gen.div <- read.delim(paste0(folder_path, 'genetic_diversity/GenDiv_subsample5Ind.txt')) %>%
  filter(EstPi > 0, !outPi %in% 'out', !outTheta %in% 'out') 

##load phylogenies and speciation rates from 100 posterior trees + MCC tree
TreeRatesSet <- readRDS(paste0(folder_path,'speciation_rate/output/MCCposterior100_treeMAPS.rds'))

treeMCC <- TreeRatesSet[["treeMCC"]][["tree"]]
ratesMCC <- TreeRatesSet[["treeMCC"]][["rates"]][!is.na(names(TreeRatesSet[["treeMCC"]][["rates"]]))][-c(1:4)] ##same as MAPS[5:(nedges+4)]

parClaDS <- tibble(rates = purrr::map(TreeRatesSet,'rates')) %>% 
  hoist(rates, 
        sigma = "sigma", 
        alpha = "alpha", 
        epsilon = "epsilon", 
        lambda_0 = "lambda_0") %>% 
  mutate(set = names(TreeRatesSet)) %>%
  select(-rates)

# load(paste0(folder_path,'speciation_rate/output/upham_4064sp_FR_MCCposterior100.rdata')) #loads TreeSet

tipLengths <- cbind.data.frame(edge = treeMCC$edge, 
                               time = treeMCC$edge.length) %>%
  filter(edge.2 <= Ntip(treeMCC)) %>%
  mutate(species = treeMCC$tip.label,
         nodes_sister = ifelse(edge.1 %in% edge.1[duplicated(edge.1)], "sister","nonSister"))

clades <- read.delim(paste0(folder_path,'speciation_rate/input/MamPhy_5911sp_tipGenFamOrdCladeGenesSampPC.txt')) %>%
  drop_na(PC) %>%
  mutate(species = word(tiplabel, 1,2, sep = "_"),
         clades = sub("^PC\\d+_",  "", PC)) %>%
  select(species, clades, ord, fam) %>%
  filter(species %in% treeMCC$tip.labels)

changeClades <- select(clades, clades) %>%
  distinct() %>%
  arrange(clades) %>%
  filter(clades %in% c("Cetartiodactyla", "EmballonuridRelated",
                       "GuineaPigRelated","Marsupials", "PhyllostomidRelated",
                       "SquirrelRelated","VespertilionidRelated")) %>%
  mutate(newNames = c("Artiodactyla", "Emballonuroidea", 
                      "Guinea Pig-related","Marsupialia", "Noctilionoidea",
                      "Squirrel-related", "Vespertilionoidea")) 

Nodes <- read.delim(paste0(folder_path,'speciation_rate/Upham_FBD_clades_nodes.txt')) 

### to include the clades that had fewer species as an arc in the
for(clade in unique(clades$clades)[!unique(clades$clades) %in% Nodes$clades]){
  a <- as.character(clades[clades$clades %in% clade,'species'])
  cladeNode <- ape::getMRCA(treeMCC, a)
  if(!is.null(cladeNode)){
    Nodes <- bind_rows(Nodes, data.frame(clades = clade, node = cladeNode))
  }
}

spRate <- read.delim(paste0(folder_path,'speciation_rate/output/MCCposterior100_tipRate.txt')) %>% 
  rename(set = treeN ) %>% 
  left_join(., clades, by = 'species') 

gendivSpRate <- spRate %>% 
  left_join(., select(gen.div, species, EstPi, subPi_mean, EstTheta, subTheta_mean), by = 'species') 

traitData <- read.delim(paste0(folder_path,'traits/matchedTraits.txt'), stringsAsFactors = FALSE) %>%
  select(-Family) %>%
  mutate(mean_temp = mean_temp + abs(min(mean_temp, na.rm = T)) + 1) 

gendivSpRateTrait <- gendivSpRate %>%
  left_join(., traitData, by = 'species') 

MutRateAll <- readRDS(paste0(folder_path, 'mutationRate/output/mutationRate_cytb3rdcodonPAML.rds')) 

gendivSpRateMutRate <- gendivSpRateTrait  %>%
  left_join(., MutRateAll, by = c('set','species')) %>%
  select(-mutrate) %>% ## mutation rate per Mya and not per year
  mutate(timeyear = time * 1000000,
         GenerationLength_y = GenerationLength_d/365,
         timeGeneration = timeyear/GenerationLength_y,
         mutRate = expNsub / timeGeneration,
         Ne = EstPi / mutRate) %>%
  arrange(set)

### Load PGLS results
PGLSglobal0 <- readRDS(paste0(folder_path,'pgls/global/global_gendivSpRate_PGLSresults.rds'))
PGLSglobalsub <- readRDS(paste0(folder_path,'pgls/global/global_gendivSpRate_subsample6Ind_PGLSresults.rds'))
PGLSglobal <- bind_rows(PGLSglobal0, PGLSglobalsub) %>%
  rename(Term = term)

PGLSclade0 <- readRDS(paste0(folder_path,'pgls/clade/clade_gendivSpRate_PGLSresults.rds'))
PGLScladesub <- readRDS(paste0(folder_path,'pgls/clade/clade_gendivSpRate_subsample6Ind_PGLSresults.rds'))
PGLSclade <- bind_rows(PGLSclade0, PGLScladesub) %>%
  rename(Term = term)

PGLSglobal_traits <- readRDS(paste0(folder_path,'pgls/global_traits/global_gendivSpRateTraits_PGLSresults.rds')) %>%
  rename(Term = term)

PGLSglobal_mutRate <- readRDS(paste0(folder_path,'pgls/global_mutRate/global_gendivSpRateMutRate_PGLSresults.rds')) %>%
  rename(Term = term)

### Load BMLM results
BMLMglobal0 <- readRDS(paste0(folder_path,'bmlm/global/global_allPosterior.rds'))
BMLMglobalsub <- readRDS(paste0(folder_path,'bmlm/global/global_allPosterior_subsample6Ind.rds'))
BMLMglobal <- c(BMLMglobal0, BMLMglobalsub) 

BMLMglobal_traits <- readRDS(paste0(folder_path,'bmlm/global_traits/global_traits_allPosterior.rds'))

BMLMglobal_mutRate <- readRDS(paste0(folder_path,'bmlm/global_mutRate/global_mutRate_allPosterior.rds'))

colSet <- data.frame(clades = unique(PGLSclade$clade),
                     col = iwanthue(length(unique(PGLSclade$clade))))

cladesSum <- data.frame(table(clades$clades), 
                        stringsAsFactors = F) %>%
  rename(clades = Var1) %>%
  left_join(colSet) 

Figure 1

### code from Odile plot_treePi.R prepare the data ####
tipRatesMCC <- spRate %>%
    filter(set %in% "treeMCC") %>%
    pull(tipRate)

sub_tree = treeMCC
sub_rates = ratesMCC
rep = map_rates_tipNroot(sub_tree, sub_rates = sub_rates, treeMCC, rates = rep(NaN,
    nrow(treeMCC$edge)))
new_rates = rep$rates
# clades_root = as.numeric(c(Nodes$node))
clades_root = as.numeric(c(Nodes[Nodes$clades %in% unique(PGLSclade$clade), "node"]))
clades_tips = list()
clades_edges = list()

for (i in unique(Nodes$clades)) {
    # unique(Nodes[Nodes$clades %in% unique(PGLSclade$clade),'clades'])
    clades_tips[[i]] = getDescendants(sub_tree, Nodes[Nodes$clades %in% i, "node"])
}

names(clades_tips)[names(clades_tips) %in% changeClades$clades] <- na.omit(changeClades[match(names(clades_tips),
    changeClades$clades), 2])

pglsSet <- unique(PGLSclade$clade)
pglsSet[pglsSet %in% changeClades$clades] <- na.omit(changeClades[match(pglsSet,
    changeClades$clades), 2])


#### a few options ####
save_pdf = F

v1_name = "EstPi"
col1_nbins = 100
col1_offset = 20
col1_pal = "YlOrRd"
D = 0.6
L1 = 0.7

v2_name = "EstTheta"
col2_nbins = 100
col2_offset = 5
col2_pal = "YlGnBu"
L2 = 0.7

vertical = 1
#### start pdf ####

if (vertical == 1) {
    if (save_pdf)
        pdf(paste0(fig_path, "Figure1.pdf"), width = 18, height = 14)
    # layout(matrix(c(1,1,2,3), ncol = 2 , byrow = T), heights = c(3,1))
    layout(matrix(c(1, 2, 1, 3), ncol = 2, byrow = T), widths = c(3, 1))

} else if (vertical == 2) {
    if (save_pdf)
        pdf(paste0("~/Desktop/Figure1_treeSpRateGenDiv.pdf"), width = 12, height = 15)
    # layout(matrix(c(1,1,2,3), ncol = 2 , byrow = T), heights = c(3,1))
    layout(matrix(c(1, 1, 3, 2), ncol = 2, byrow = T), heights = c(3, 1))

} else if (vertical == 3) {
    if (save_pdf)
        pdf(paste0("~/Desktop/Figure1_treeSpRateGenDiv.pdf"), width = 12, height = 14)
    # layout(matrix(c(1,1,2,3), ncol = 2 , byrow = T), heights = c(3,1))
    layout(matrix(c(1, 1, 2, 3), ncol = 2, byrow = T), heights = c(3, 0.6))

} else if (vertical == 4) {
    if (save_pdf)
        pdf(paste0("~/Desktop/Figure1_treeSpRateGenDiv.pdf"), width = 12, height = 15)
    # layout(matrix(c(1,1,2,3), ncol = 2 , byrow = T), heights = c(3,1))
    layout(matrix(c(2, 1, 3), ncol = 1, byrow = T), heights = c(0.6, 3, 0.6))

} else if (vertical == 5) {
    if (save_pdf)
        pdf(paste0("~/Desktop/Figure1_treeSpRateGenDiv.pdf"), width = 12, height = 15)
    # layout(matrix(c(1,1,2,3), ncol = 2 , byrow = T), heights = c(3,1))
    layout(matrix(c(4, 2, 1, 1, 3, 5), ncol = 2, byrow = T), heights = c(0.6, 3,
        0.6))

}

#### plot the tree #####
par(mar = c(2, 1, 2, 0))
plot_tree = treeMCC
plot_tree$tip.label[] = ""
plot_tree$tip.label[1] = "........"  # to make the tree smaller, long invisible tip label was added
tipcol = rgb(255, 255, 255, alpha = 255, maxColorValue = 255)
col_tree = invisible(plot.with.rate.withNaNs(plot_tree, c(new_rates), NaN_color = "gray90",
    leg = F, same.scale = T, lwd = 2, log = T, show.tip.label = T, tip.color = tipcol))

#### collect tips coordinates ####
lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv)
tip <- 1:lastPP$Ntip
XX <- lastPP$xx  #[tip]
YY <- lastPP$yy  #[tip]

#### chose tip lines colors ####
col_v1 = c(colorRampPalette((brewer.pal(n = 8, name = col1_pal)))(col1_offset + col1_nbins))  # color palette
v1 = c()
for (i in treeMCC$tip.label) {
    if (i %in% gen.div$species) {
        v1 = c(v1, as.numeric(gen.div[gen.div$species == i, v1_name]))
    } else {
        v1 = c(v1, NA)
    }
}
v1_transformed = log(v1 + 0.005)
range_v1 = range(v1_transformed, na.rm = T)
lab_v1 = c(0, 0.01, 0.02, 0.05, 0.1)
at_v1 = log(lab_v1 + 0.005)
col1 = col_v1[round(((v1_transformed - range_v1[1])/(range_v1[2] - range_v1[1])) *
    (col1_nbins - 1)) + col1_offset + 1]

#### add them to the tree ####
line_position = D + c(0, L1)
total_depth = sqrt(XX[1]^2 + YY[1]^2)
add2depth = 0.05 * total_depth
for (i in 1:length(treeMCC$tip.label)) {
    if (!is.na(col1[i])) {
        addx = add2depth * line_position * XX[i]/total_depth
        addy = add2depth * line_position * YY[i]/total_depth

        lines(XX[i] + addx, YY[i] + addy, col = col1[i], lwd = 3, xpd = T)
    }
}

#### chose tip lines colors / 2nd measure ####
col_v2 = c("#FFFFFF", colorRampPalette((brewer.pal(n = 8, name = col2_pal)))(col2_offset +
    col2_nbins))  # color palette
v2 = c()
for (i in treeMCC$tip.label) {
    if (i %in% gen.div$species) {
        v2 = c(v2, as.numeric(gen.div[gen.div$species == i, v2_name]))
    } else {
        v2 = c(v2, NA)
    }
}
v2_transformed = log(v2 + 0.005)
range_v2 = range(v2_transformed, na.rm = T)
lab_v2 = c(0, 0.01, 0.02, 0.05, 0.1)
at_v2 = log(lab_v2 + 0.005)
col2 = col_v2[round(((v2_transformed - range_v2[1])/(range_v2[2] - range_v2[1])) *
    (col2_nbins - 1)) + col2_offset + 1]  #### add them to the tree

line_position = D + L1 + 0.05 + c(0, L2)
total_depth = sqrt(XX[1]^2 + YY[1]^2)
add2depth = 0.05 * total_depth

for (i in 1:length(treeMCC$tip.label)) {
    if (!is.na(col2[i])) {
        addx = add2depth * line_position * XX[i]/total_depth
        addy = add2depth * line_position * YY[i]/total_depth

        lines(XX[i] + addx, YY[i] + addy, col = col2[i], lwd = 3, xpd = T)
    }
}

# if (save_pdf) dev.off() legends #### image.plot(z = range_v1,col =
# col_v1[-col1_offset-1], horizontal=F,legend.only = T, legend.mar = 12,
# legend.shrink\t= 0.3,axis.args=list( at=at_v1, labels=lab_v1))#,)
# image.plot(z = range_v2,col = col_v2[-col2_offset-1],
# horizontal=F,legend.only = T, legend.mar = 6, legend.shrink\t=
# 0.3,axis.args=list( at=at_v2, labels=lab_v2))#,axis.args=list( at=log(ticks),
# labels=ticks))#### dots at roots

### plot nodes of PGLS clades
for (i in clades_root) {
    points(XX[i], YY[i], col = "orangered4", bg = "orangered3", pch = 21, cex = 0.8)
}

#### circular arcs for analysed clades ####
l = D + 0.05 + L1 + L2 + D
nCT = length(clades_tips)
# nCT = length(unique(PGLSclade$clade))
clades_names = unique(names(clades_tips))
radius = total_depth + add2depth * (l + D)
add2text = 0.9
further = rep(0, nCT)
further[which(clades_names %in% c("Lagomorpha", "Squirrel-related"))] = add2text
set.seed(1)
# grays = paste0('gray', sample(20:80,nCT)) grays[1] = 'red'

grays = Nodes[, c("clades", "node")] %>%
    # filter(clades %in% unique(PGLSclade$clade)) %>%
mutate(ord = seq(1:nCT)) %>%
    arrange(desc(node)) %>%
    left_join(., cladesSum, by = "clades") %>%
    # mutate(col = rep(paste0('gray',c('20','80')),nCT)[1:nCT]) %>%
arrange(ord) %>%
    pull(col)

for (i in 1:nCT) {
    tips = sort(clades_tips[[i]][clades_tips[[i]] <= (sub_tree$Nnode + 1)])
    lt = length(tips)
    tips = tips[-((lt - 2):lt)]  ###why is not working for every clade with more than one species?
    tips = tips[-(1:2)]

    xs = XX[tips]
    ys = YY[tips]
    xs = xs + add2depth * l * xs/total_depth
    ys = ys + add2depth * l * ys/total_depth

    colGray <- ifelse(as.character(clades_names[i]) %in% pglsSet, grays[i], "gray90")
    lines(xs, ys, lwd = 3, col = colGray)

    lt = length(tips)

    if (lt > 1) {
        cl = T

        angle_i = acos(xs[floor(lt/2)]/(total_depth + add2depth * l))
        if (ys[floor(lt/2)] < 0) {
            angle_i = -1 * angle_i
            cl = F
        }

        if (as.character(clades_names[i]) %in% pglsSet) {
            arctext(as.character(clades_names[i]), radius = radius + further[i] *
                add2depth, middle = angle_i, col = grays[i], clockwise = cl, cex = 1.5,
                xpd = T)
        }
        # arctext(as.character(clades_names[i]), radius = radius +
        # further[i]*add2depth, middle = angle_i, col = 'gray20', clockwise =
        # cl)
    }
}


#### first densplot ####

clades_tips2 <- clades_tips[names(clades_tips) %in% pglsSet]

rangeX = range(XX)
rangeY = range(YY)

relative_width = 0.4
relative_heigth = 0.35

relative_rangeX = relative_width * rangeX
relative_rangeY = relative_width * rangeY

xrel = function(x, rrX = relative_rangeX, from = 0, to = 1) {
    newx = from + (to - from) * (x - min(x))/diff(range(x))
    newx = newx * diff(rrX) + min(rrX)
    return(newx)
}

yrel = function(x, rrX = relative_rangeY, from = 0, to = 1) {
    newx = from + (to - from) * (x - min(x))/diff(range(x))
    newx = newx * diff(rrX) + min(rrX)
    return(newx)
}

polygon(relative_rangeX[c(1, 1, 2, 2)], relative_rangeY[c(1, 2, 2, 1)], border = NA,
    col = adjustcolor("white", alpha.f = 0.8))

# plot the density
lower_y = 0.25
densclads = lapply(clades_tips2, function(vect) {
    density(log(ratesMCC[sapply(vect, function(x) {
        which(treeMCC$edge[, 2] == x)
    })]))
})
dens_all = density(log(tipRatesMCC))  #new_rates for all rates and tipRatesMCC for just tip rates
maxy = max(dens_all$y)
for (i in 1:length(densclads)) {
    maxy = max(max(densclads[[i]]$y), maxy)
}

newlower_y = yrel(c(0, lower_y, 1))[2]
for (i in 1:length(densclads)) {
    lines(xrel(c(range(dens_all$x), densclads[[i]]$x))[-c(1:2)], yrel(c(maxy, densclads[[i]]$y),
        from = lower_y)[-1], lwd = 2, col = grays[i])
}

polygon(xrel(dens_all$x)[c(1, 1:length(dens_all$x), length(dens_all$x))], c(newlower_y,
    yrel(c(maxy, dens_all$y), from = lower_y)[-1], newlower_y), border = NA, col = adjustcolor("gray80",
    alpha.f = 0.5))
lines(xrel(dens_all$x), yrel(c(maxy, dens_all$y), from = lower_y)[-1], lwd = 3)
# ad the color legend
axis_y = 0.15

col_x = xrel(c(range(diff(log(ratesMCC))), min(dens_all$x) + c(diff(range(dens_all$x)) *
    (0:length(col_tree$col))/length(col_tree$col))))[-c(1, 2)]
col_y = yrel(c(0, 1), from = axis_y + 0.005, to = lower_y - 0.05)
for (i in 1:length(col_tree$col)) {
    polygon(col_x[c(i, i + 1)][c(1, 1, 2, 2)], col_y[c(1, 2, 2, 1)], border = col_tree$col[i],
        col = col_tree$col[i])
}

# add the axis
lines(relative_rangeX, yrel(c(0, 0, 1), from = axis_y)[1:2], lwd = 2)
lab = c(0.03, 0.05, 0.1, 0.2, 0.5, 1)
ticks = xrel(sort(c(range(dens_all$x), log(lab))))
text(0, relative_rangeY[1], expression(Speciation ~ rates ~ "(" ~ Myr^"-1" ~ ")"))
for (i in 2:(1 + length(lab))) {
    lines(ticks[c(i, i)], yrel(c(0, 1), from = axis_y - 0.015, to = axis_y + 0.015),
        lwd = 1.5)
    text(labels = lab[i - 1], x = ticks[i], y = yrel(c(0, 1), from = axis_y - 0.06,
        to = axis_y + 0.015)[1])
}


#### densities for Pi ####

par(mar = c(2, 0, 2, 1))

plot(1000, xlim = c(-1, 1), ylim = c(-1, 1), axes = F, xlab = "", ylab = "")
lower_y = 0.3

#### First measure ####

relative_rangeX = c(0, 1)
relative_rangeY = c(-1, 1) * 0.9
dens_all1 = density(v1_transformed, na.rm = T)
dens_all2 = density(v2_transformed, na.rm = T)

ry = range(c(dens_all1$x, dens_all2$x, -6.5, -1))
xrel = function(x, rrX = relative_rangeX, from = 0, to = 1) {
    newx = from + (to - from) * (x - min(x))/diff(range(x))
    newx = newx * diff(rrX) + min(rrX)
    return(newx)
}

yrel = function(x, rrX = relative_rangeY, from = 0, to = 1, My = ry[2], my = ry[1]) {
    newx = from + (to - from) * (x - my)/(My - my)
    newx = newx * diff(rrX) + min(rrX)
    return(newx)
}


dens_all = density(v1_transformed, na.rm = T)
# plot the density
densclads = lapply(clades_tips2, function(vect) {
    density(v1_transformed[vect[vect < 4065]], na.rm = T)
})
maxy = max(dens_all$y)
for (i in 1:length(densclads)) {
    maxy = max(max(densclads[[i]]$y), maxy)
}

newlower_y = xrel(c(0, lower_y, 1))[2]
for (i in 1:length(densclads)) {
    lines(y = yrel(c(range(dens_all$x), densclads[[i]]$x))[-c(1:2)], x = -xrel(c(maxy,
        densclads[[i]]$y), from = lower_y)[-1], lwd = 2, col = grays[i])
}

polygon(y = yrel(dens_all$x)[c(1, 1:length(dens_all$x), length(dens_all$x))], x = -c(newlower_y,
    xrel(c(maxy, dens_all$y), from = lower_y)[-1], newlower_y), border = NA, col = adjustcolor("gray80",
    alpha.f = 0.5))
lines(y = yrel(dens_all$x)[c(1, 1:length(dens_all$x), length(dens_all$x))], x = -c(newlower_y,
    xrel(c(maxy, dens_all$y), from = lower_y)[-1], newlower_y), lwd = 3)

# ad the color legend
axis_y = 0.15
#
c1 = col_v1
# rc1 = range(round(( (c(v1_transformed) - range_v1[1]) /
# (range_v1[2]-range_v1[1]))*(col1_nbins-1) )+col1_offset+1, na.rm = T)
col_x = yrel(range_v1[1] + diff(range_v1) * (0:length(c1))/length(c1))
# col_x = col_x[rc1[1]:min(length(c1),rc1[2])] c1 =
# c1[rc1[1]:min(length(c1),rc1[2])]
col_y = xrel(c(0, 1), from = axis_y + 0.005, to = lower_y - 0.05)
for (i in 1:length(c1)) {
    polygon(y = col_x[c(i, i + 1)][c(1, 1, 2, 2)], x = -col_y[c(1, 2, 2, 1)], border = c1[i],
        col = c1[i])
}
polygon(y = c(relative_rangeY[1], col_x[1])[c(1, 1, 2, 2)], x = -col_y[c(1, 2, 2,
    1)], border = c1[1], col = c1[1])
lx = length(c1)
polygon(y = c(relative_rangeY[2], col_x[lx])[c(1, 1, 2, 2)], x = -col_y[c(1, 2, 2,
    1)], border = c1[lx], col = c1[lx])
# # add the axis lines(relative_rangeX, yrel(c(0,0,1), from = axis_y)[1:2], lwd
# = 2) lab = c(0.03,0.05,0.1,0.2,0.5,1) ticks = xrel(sort(c(range(dens_all$x),
# log(lab))))
# text(0,relative_rangeY[1],expression(speciation~rates~'('~My^'-1'~')')) for
# (i in 2:(1+length(lab))){ lines(ticks[c(i,i)], yrel(c(0,1), from =
# axis_y-0.015, to = axis_y + 0.015), lwd = 1.5) text(labels = lab[i-1], x =
# ticks[i], y = yrel(c(0,1), from = axis_y-0.06, to = axis_y + 0.015)[1]) }


#### Second measure ####

relative_rangeX = c(0, 1)

xrel = function(x, rrX = relative_rangeX, from = 0, to = 1) {
    newx = from + (to - from) * (x - min(x))/diff(range(x))
    newx = newx * diff(rrX) + min(rrX)
    return(newx)
}

yrel = function(x, rrX = relative_rangeY, from = 0, to = 1, My = ry[2], my = ry[1]) {
    newx = from + (to - from) * (x - my)/(My - my)
    newx = newx * diff(rrX) + min(rrX)
    return(newx)
}


dens_all = density(v2_transformed, na.rm = T)

# plot the density
densclads = lapply(clades_tips2, function(vect) {
    density(v2_transformed[vect[vect < 4065]], na.rm = T)
})
dens_all = density(v2_transformed, na.rm = T)
maxy = max(dens_all$y)
for (i in 1:length(densclads)) {
    maxy = max(max(densclads[[i]]$y), maxy)
}

newlower_y = xrel(c(0, lower_y, 1))[2]
for (i in 1:length(densclads)) {
    lines(y = yrel(c(range(dens_all$x), densclads[[i]]$x))[-c(1:2)], x = xrel(c(maxy,
        densclads[[i]]$y), from = lower_y)[-1], lwd = 2, col = grays[i])
}

polygon(y = yrel(dens_all$x)[c(1, 1:length(dens_all$x), length(dens_all$x))], x = c(newlower_y,
    xrel(c(maxy, dens_all$y), from = lower_y)[-1], newlower_y), border = NA, col = adjustcolor("gray80",
    alpha.f = 0.5))
lines(y = yrel(dens_all$x)[c(1, 1:length(dens_all$x), length(dens_all$x))], x = c(newlower_y,
    xrel(c(maxy, dens_all$y), from = lower_y)[-1], newlower_y), lwd = 3)

# add the color legend
axis_y = 0.15
#

c2 = col_v2
# rc1 = range(round(( (c(v1_transformed) - range_v1[1]) /
# (range_v1[2]-range_v1[1]))*(col1_nbins-1) )+col1_offset+1, na.rm = T)
col_x = yrel(range_v2[1] + diff(range_v2) * (0:length(c2))/length(c2))
# col_x = col_x[rc1[1]:min(length(c1),rc1[2])] c1 =
# c1[rc1[1]:min(length(c1),rc1[2])]
col_y = xrel(c(0, 1), from = axis_y + 0.005, to = lower_y - 0.05)
for (i in 1:length(c2)) {
    polygon(y = col_x[c(i, i + 1)][c(1, 1, 2, 2)], x = col_y[c(1, 2, 2, 1)], border = c2[i],
        col = c2[i])
}
polygon(y = c(relative_rangeY[1], col_x[1])[c(1, 1, 2, 2)], x = col_y[c(1, 2, 2,
    1)], border = c2[2], col = c2[2])
lx = length(c2)
polygon(y = c(relative_rangeY[2], col_x[lx])[c(1, 1, 2, 2)], x = col_y[c(1, 2, 2,
    1)], border = c2[lx], col = c2[lx])


#### axis ####

# add the axis
lines(y = relative_rangeY, x = c(axis_y, axis_y), lwd = 2)
lines(y = relative_rangeY, x = -c(axis_y, axis_y), lwd = 2)

lab = c(1e-04, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2)
ticks = yrel(sort(c(range(dens_all$x), log(lab + 0.005))))
# text(0,relative_rangeY[1],expression(speciation~rates~'('~My^'-1'~')'))
for (i in 2:(1 + length(lab))) {
    lines(y = ticks[c(i, i)], x = axis_y + c(-0.015, 0.015), lwd = 1.5)
    text(labels = lab[i - 1], y = ticks[i], x = 0)
    lines(y = ticks[c(i, i)], x = -axis_y + c(-0.015, 0.015), lwd = 1.5)

}

text(lab = expression(paste(theta["T"])), x = -0.2, y = 0.94, cex = 1)
text(lab = expression(paste(theta["W"])), x = 0.2, y = 0.94, cex = 1)
text(lab = "Genetic Diversity", x = 0, y = 1, cex = 1.1)

#### close pdf ####
if (save_pdf) dev.off()

Figure 1. Mammals MCC phylogeny from Upham et al. 2019, colored with branch-specific speciation rates estimated with ClaDS2. Outer circles reflect estimated original? genetic diversity, colored in red for \(\theta_{T}\) and blue for \(\theta_{W}\) (see scale on the top right corner). Insets show distributions of tip speciation rates (inner panel, analyses on the MCC tree) and genetic diversity (top right panel) for all mammals (in black) and for 14 clades (colored) with more than 20 species, on a log scale. ClaDS2 hyper-parameters from posterior distribution of trees and MCC tree are shown in Fig. S2, while mean tip speciation rates estimates are shown in Fig. S3 and S4.


Figure 2

#### label clades that were used for PGLS analyses
mGendivSpRate <- spRate %>%
    filter(!set %in% "treeMCC") %>%
    group_by(species) %>%
    dplyr::summarise(rate_mean = mean(tipRate, na.rm = TRUE), rate_sd = sd(tipRate,
        na.rm = TRUE), rate_se = rate_sd/sqrt(n()), rate_lower.ci = CI(tipRate, ci = 0.95)[1],
        rate_upper.ci = CI(tipRate, ci = 0.95)[3], clades = unique(clades)) %>%
    arrange(clades) %>%
    mutate(ord = seq(1, length(n)), species = forcats::fct_reorder(species, ord),
        clades = forcats::fct_relevel(case_when(!clades %in% unique(PGLSclade$clade) ~
            "Other", TRUE ~ clades), "Other", after = 14)) %>%
    inner_join(., select(gen.div, species, EstPi, EstTheta, Nind), by = "species")

tcol0 <- cladesSum %>%
    filter(clades %in% mGendivSpRate$clades) %>%
    bind_rows(., data.frame(clades = "Other", col = "gray90"))

tcol <- c("black", as.character(tcol0[, 3]))
names(tcol) <- c("All Mammals", as.character(tcol0[, 1]))

# q30 <- ggplot(mGendivSpRate, aes(y = EstPi, x = rate_mean)) +
# geom_errorbarh(aes(xmin = rate_lower.ci, xmax = rate_upper.ci), size = 0.5,
# #alpha = 0.5, color = 'gray70') + geom_point(size = 1.2, shape = 21, fill =
# 'white', #alpha = 0.5, color = 'gray70', stroke = 0.5) +
# scale_y_continuous(trans='log10') + scale_x_continuous(trans='log10') +
# geom_smooth(fill = 'blue', color = 'black', alpha = 0.2, linetype = 'dashed',
# method=lm, position = 'identity', size = 0.8, fullrange = FALSE) + theme_bw()
# + theme(plot.margin = unit(c(0,0,0,0), 'cm'), panel.grid.major =
# element_blank(), panel.grid.minor = element_blank()) + labs(y =
# expression(paste('Genetic diversity (',theta['T'],')')), x = NULL) +
# annotate(geom='text', x=0.028, y=0.0004, label=paste0('MCC PGLS:\nEstimate =
# ', round(mccpgls['EstPi','slope'],3), '\np-value = ',
# formatC(mccpgls['EstPi','pvalue'], format = 'E', digits = 3), '\nlambda = ',
# round(mccpgls['EstPi','lambda'],3), '\nR^2 = ',
# round(mccpgls['EstPi','R2resid'],3)), size = 3.6, hjust = 0)

pglsSet2 <- pglsSet
names(pglsSet2) <- levels(droplevels(filter(mGendivSpRate, !clades %in% "Other"))$clades)

mGendivSpRate2 <- mGendivSpRate
mGendivSpRate2$clades2 <- "Mammals"
mGendivSpRate3 <- filter(mGendivSpRate, !clades %in% "Other") %>%
    mutate(clades2 = clades) %>%
    bind_rows(mGendivSpRate2) %>%
    mutate(clades2 = fct_relevel(clades2, "Mammals", after = 0))

pglsSet2 <- c("All Mammals", pglsSet)
names(pglsSet2) <- levels(mGendivSpRate3$clades2)

q3 <- ggplot(data = mGendivSpRate3, aes(y = EstPi, x = rate_mean)) + geom_errorbarh(aes(xmin = rate_lower.ci,
    color = clades2, xmax = rate_upper.ci), alpha = 0.8, show.legend = F) + scale_y_continuous(trans = "log10") +
    scale_x_continuous(trans = "log10") + geom_smooth(aes(y = EstPi, x = rate_mean),
    fill = "blue", color = "black", alpha = 0.2, linetype = "dashed", method = lm,
    position = "identity", size = 0.5, fullrange = FALSE) + scale_colour_manual(values = tcol[-16]) +
    theme_bw() + theme(plot.margin = unit(c(0, 0, 0, 0), "cm"), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), panel.spacing.x = unit(0, "lines"), panel.spacing.y = unit(0,
        "lines"), strip.text = element_text(size = 10, margin = margin(0.1, 0, 0.1,
        0, "cm")), strip.placement.x = "inside") + labs(y = expression(paste("Genetic diversity (",
    theta["T"], ")")), x = bquote("Speciation rate " ~ (Myr^-1))) + facet_wrap(~clades2,
    ncol = 5, labeller = labeller(clades2 = pglsSet2))

# ggarrange(ggarrange(NULL,q30,NULL,ncol = 1, heights = c(0.2,1,0.2)),q3,
# ncol=1)

# q <- ggarrange(q30,q3, ncol=1)

q3

ggsave(paste0(fig_path, "Figure2.pdf"), q3, width = 9, height = 6)
ggsave(paste0(fig_path, "Figure2.png"), q3, width = 9, height = 6)

# ggsave('/Users/acas/Dropbox/Post-docs/Morlon_Lab/manuscript_projects_info/mammals_genDiv_SpRate/figures/PiVsRate.pdf',
# q3, width = 10, height = 8, useDingbats = FALSE )


# q3.5 <- ggplot(data = mGendivSpRate, aes(y = EstTheta, x = rate_mean)) +
# geom_errorbarh(aes(xmin = rate_lower.ci,xmax = rate_upper.ci,colour =
# clades), alpha = 0.4) + geom_point(aes(colour = clades), size = 0.5, alpha =
# 0.4) + scale_colour_manual(values = tcol, breaks = names(tcol), labels =
# c('All Mammals', pglsSet, 'Other')) + scale_y_continuous(trans='log10',
# limits = c(0.00015,0.85)) + scale_x_continuous(trans='log10') + #
# geom_vline(data = mGendivSpRate, # aes(xintercept = mean(rate_mean)),
# linetype = 2, colour = 'gray50') + # geom_hline(data = mGendivSpRate, #
# aes(yintercept = mean(EstTheta)), linetype = 2, colour = 'gray50') +
# stat_smooth(data = filter(mGendivSpRate, !clades %in% 'Other'), aes(y =
# EstTheta, x = rate_mean, colour = clades), geom='line', method=lm, position =
# 'identity', se = FALSE, fullrange = TRUE, show.legend = F, alpha = 0.6, size
# = 0.7) + geom_smooth(data = mGendivSpRate, aes(y = EstTheta, x = rate_mean,
# color = 'All Mammals'), method=lm, position = 'identity', fullrange = TRUE,
# se = F) + #ggtitle('Pi vs mean rates with CI from 100 posterior trees') +
# theme_bw() + theme(plot.margin = unit(c(0,0,0,0), 'cm'), panel.grid.major =
# element_blank(), panel.grid.minor = element_blank()) + labs(y =
# expression(paste(theta['W'])), x = bquote('Speciation rate '~(Myr^-1)),
# colour = 'Clades') + annotate(geom='text', x=0.015, y=0.0003,
# label=paste0('MCC PGLS:\nEstimate = ', round(mccpgls['EstTheta','slope'],3),
# '\np-value = ', formatC(mccpgls['EstTheta','pvalue'], format = 'E', digits =
# 3), '\nlambda = ', round(mccpgls['EstTheta','lambda'],3), '\nR^2 = ',
# round(mccpgls['EstTheta','R2resid'],3)), size = 2.5, hjust = 0) #q3.5 q33.5
# <- ggarrange(q3, q3.5, ncol = 1, nrow = 2, align = 'v', #widths = c(2.5, 1),
# heights = c(1, 2.5, 2.5), common.legend = T , legend = 'right') q33.5
# ggsave(paste0(fig_path,'Figure2.pdf'), q33.5, width = 7, height = 8,
# useDingbats = FALSE ) ggsave(paste0(fig_path,'Figure2.png'), q33.5, width =
# 7, height = 8)

Figure 2. Relationship between intraspecific genetic diversity (Tajima’s T) and speciation rate for all mammal species and for each of the 14 clades with at least 20 species. Speciation rates represented by the 95% confidence intervals from 100 posterior trees. Included OLS regression lines; axis are log scaled.


Figure 3

pglsResult <- bind_rows(PGLSglobal,PGLSclade) %>% 
  filter(!set %in% 'treeMCC', 
         Term %in% 'log(tipRate)', 
         analysis %in% 'EstPi') %>%
  rename(pvalue = Pr...t..) %>%
  mutate(.variable = ifelse(is.na(clade), 'All Mammals', clade),
         pvalueBin = ifelse(pvalue < 0.05, 'Signif.','Not signif.')) %>%
  select(clade, .variable, set, Estimate, pvalue, pvalueBin) 

BMLMglobalPi <- tibble()
for(g in names(BMLMglobal[['estPi']])[-101]){
  gl <- BMLMglobal[['estPi']][[g]] %>% 
    filter(.chain %in% 1) %>%
    select(-.value, -.variable, -r_clades, -.chain,-.iteration,-.draw) %>%
    rename(.value = b_logtipRate) %>%
    mutate(.variable = 'All Mammals') %>%
    distinct()
  
  cl <- BMLMglobal[['estPi']][[g]] %>% 
    filter(.chain %in% 1) %>%
    select(-.chain,-.iteration,-.draw,-r_clades, -b_logtipRate) %>%
    distinct()
  
  BMLMglobalPi <- bind_rows(BMLMglobalPi,bind_rows(gl,cl))
}

pp1 <- ggplot(filter(BMLMglobalPi, .variable %in% 'All Mammals'), 
              aes(x = .value, y = fct_rev(.variable))) +
  #stat_intervalh(.width = c(.50, .80, .95, .99)) + 
  geom_vline(aes(xintercept = 0), linetype= 2) +
  geom_halfeyeh(fill = "gray80", point_interval = median_hdi, 
                .width = .95, alpha = 0.8, scale = 0.7) + 
  theme_bw() + xlim(-2.27, 1.04)  +
  theme(axis.title.x=element_blank(),axis.title.y=element_blank(),
        axis.text.x=element_blank(), axis.ticks.x=element_blank(), 
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
        text = element_text(size=22)) +
  geom_point(data = filter(pglsResult, .variable %in% 'All Mammals'), 
             aes(x = Estimate, y =  fct_rev(.variable), 
                 colour = pvalueBin), alpha = 0.8, size = 1, 
             position = position_nudge(y = -0.15), show.legend = F) +
  scale_color_manual(values = c('red','gray80'))

pp2 <- ggplot(filter(BMLMglobalPi, !.variable %in% 'All Mammals'), 
              aes(x = .value, y = fct_rev(.variable))) +
  geom_vline(aes(xintercept = 0), linetype= 2) +
  geom_halfeyeh(fill = "gray80",
                point_interval = median_hdi, .width = .95, alpha = 0.8, scale = 0.7, show.legend = F ) +
  #scale_fill_manual(values = tcol[2:15]) + 
  scale_y_discrete(labels=rev(pglsSet)) +
  theme_bw() + xlim(-2.27, 1.04) + 
  theme(#axis.title.x=element_blank(),
    axis.title.y=element_blank(),
    axis.text.y = element_text(colour = rev(tcol[2:15])),
    #axis.text.x=element_blank(), axis.ticks.x=element_blank(), 
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(), 
    text = element_text(size=22)) +
  geom_point(data = filter(pglsResult, !.variable %in% 'All Mammals'), 
             aes(x = Estimate, y =  fct_rev(.variable), 
                 colour = pvalueBin), alpha = 0.8, size = 1, 
             position = position_nudge(y = -0.15), show.legend = F) +
  scale_colour_manual(values = c('gray80','red')) + 
  labs(x = "Slope Estimate")

p = ggarrange(pp1, pp2, nrow = 2,heights = c(0.3, 2), align = "v")
p

ggsave(paste0(fig_path,'Figure3.pdf'), p, height = 9, width = 9)
ggsave(paste0(fig_path,'Figure3.png'), p, height = 9, width = 9)

Figure 3. Slope and significance of the relationship between intraspecific genetic diversity and speciation rate for all mammals (top panel) and each of the 14 clades with at least 20 species (bottom panel). Results for Bayesian Multilevel Models (BMLM, gray density plots with median point and 95% confidence intervals) and for the Phylogenetic Generalized Least Squares (PGLS, points colored red if p-value < 0.05) analysis. Results for all genetic diversity measures and on the posterior distribution of trees are shown in Fig. S5.


Table 1

pglsResulttraitsMCC <- PGLSglobal_traits %>%
    filter(set %in% "treeMCC", !Term %in% "(Intercept)") %>%
    rename(p = Pr...t.., SE = Std..Error) %>%
    ungroup() %>%
    mutate(`p-value` = ifelse(p < 0.001, "<0.001", round(p, 4)), Term = recode(Term,
        `log(tipRate)` = !!"λ", `log(BodyMassKg_notInputed)` = "Body Mass", `log(geoArea_km2)` = "Range area",
        `log(mean_temp)` = "Mean temperature", `log(litter_or_clutch_size_n)` = "Litter size",
        `log(GenerationLength_d)` = "Generation length"), ana = "PGLS") %>%
    # select(analysis, Estimate, Term, SE, `p-value`, ana)
select(analysis, Estimate, Term, SE, p, ana)

BMLMglobal_traitsExt <- tibble()
for (g in names(BMLMglobal_traits)) {
    for (i in names(BMLMglobal_traits[[g]])) {
        BMLMglobal_traitsExt <- bind_rows(BMLMglobal_traitsExt, BMLMglobal_traits[[g]][[i]])
    }
}

BMLMglobal_traitsExt <- BMLMglobal_traitsExt %>%
    select(-.chain, -.iteration, -.draw) %>%
    rename(Term = .variable, Estimate = .value, analysis = model) %>%
    mutate(ana = "BMLM")

BMLMglobal_traitsExtMCC <- filter(BMLMglobal_traitsExt, set %in% "treeMCC") %>%
    group_by(Term, analysis) %>%
    median_qi(Estimate) %>%
    mutate(Term = recode(Term, b_logtipRate = !!"λ", b_logBodyMassKg_notInputed = "Body Mass",
        b_loggeoArea_km2 = "Range area", b_logmean_temp = "Mean temperature", b_loglitter_or_clutch_size_n = "Litter size",
        b_logGenerationLength_d = "Generation length"), ana = "BMLM", `95% CI` = paste0("[",
        round(.lower, 3), "; ", round(.upper, 3), "]")) %>%
    select(-(.lower:.interval))

BMLMPGLSmcc <- bind_rows(pglsResulttraitsMCC, BMLMglobal_traitsExtMCC) %>%
    mutate_if(is.numeric, round, 3) %>%
    pivot_wider(names_from = ana, values_from = c(Estimate, SE, p, `95% CI`)) %>%
    select(-c("SE_BMLM", "p_BMLM", "95% CI_PGLS")) %>%
    relocate(Estimate_BMLM, .before = "95% CI_BMLM") %>%
    pivot_wider(names_from = analysis, values_from = Estimate_PGLS:`95% CI_BMLM`)

BMLMPGLSmccRed <- BMLMPGLSmcc[, c(1, 3, 6, 9, 12, 15, 4, 7, 10, 13, 16, 2, 5, 8,
    11, 14)] %>%
    select(-starts_with("p"))

ps <- BMLMPGLSmcc %>%
    select(starts_with("p"))

typology <- tibble(col_keys = names(BMLMPGLSmccRed), name = names(BMLMPGLSmccRed)) %>%
    separate(name, sep = "_", into = c("stat", "analysis", "model"))

ft <- autofit(flextable(BMLMPGLSmccRed)) %>%
    set_header_df(mapping = typology[, c(1, 4, 3, 2)], key = "col_keys") %>%
    merge_h(part = "header") %>%
    merge_v(part = "header") %>%
    flextable::compose(i = 1, j = 2, part = "header", value = as_paragraph("θ",
        as_sub("T"), " ~ Traits")) %>%
    flextable::compose(i = 1, j = 6, part = "header", value = as_paragraph("λ",
        " ~ Traits")) %>%
    flextable::compose(i = 1, j = 10, part = "header", value = as_paragraph("θ",
        as_sub("T"), " ~ ", "λ", " + Traits")) %>%
    colformat_num(j = colnames(BMLMPGLSmccRed), na_str = "", digits = 3) %>%
    theme_booktabs(fontsize = 10) %>%
    fix_border_issues() %>%
    align_nottext_col(align = "center", header = TRUE) %>%
    vline(j = 3, border = officer::fp_border(), part = "all") %>%
    vline(j = 5, border = officer::fp_border(width = 2), part = "all") %>%
    vline(j = 7, border = officer::fp_border(), part = "all") %>%
    vline(j = 9, border = officer::fp_border(width = 2), part = "all") %>%
    vline(j = 11, border = officer::fp_border(), part = "all") %>%
    bold(j = 1, part = "all") %>%
    bold(j = 2, i = which(ps$p_PGLS_piTraits < 0.001), part = "body") %>%
    bold(j = 6, i = which(ps$p_PGLS_SpRateTraits < 0.001), part = "body") %>%
    bold(j = 10, i = which(ps$p_PGLS_piSpRateTraits < 0.001), part = "body")

ft
# save_as_image(ft, path = paste0(fig_path,'table1.png'))

Table 1. Correlations between genetic diversity (\(\theta_{T}\)), speciation rates (\(\lambda\)) and traits of interest; PGLS and BMLM analyses on the MCC tree. PGLS estimates in bold have a p-value < 0.05. Results on the posterior distribution of trees are shown in Fig. S6.


Figure 4

pglsResultMR <- PGLSglobal_mutRate %>% 
  filter(!analysis %in% c("cEstPiSpRate", "piMutRate"),
         !Term %in% '(Intercept)') %>%
  rename(pvalue = Pr...t..) %>%
  mutate(pvalueBin = ifelse(pvalue < 0.05, 'Signif.','Not signif.'),
         ana = 'PGLS') %>%
  select(analysis, set, Estimate, Term, pvalue, pvalueBin, ana) %>% data.frame()

pglsResultMR$analysis <- factor(pglsResultMR$analysis, 
                                labels = c(expression(paste(mu,' ~ ', lambda)),
                                           expression(paste(N["e"], ' ~ ', lambda))))

BMLMglobal_mutRateExt <- tibble()
for(g in names(BMLMglobal_mutRate)){
  for(i in names(BMLMglobal_mutRate[[g]])){
    BMLMglobal_mutRateExt <- bind_rows(BMLMglobal_mutRateExt, 
                                       BMLMglobal_mutRate[[g]][[i]])
  }
}

BMLMglobal_mutRateExt <- BMLMglobal_mutRateExt %>% 
  select(-.chain,-.iteration,-.draw) %>%
  rename(Term = .variable, Estimate = .value, analysis = model) %>%
  mutate(Term = case_when(Term %in% "b_logtipRate"  ~ "log(tipRate)",
                          Term %in% "b_logmutrate" ~ "log(mutrate)",
                          Term %in% "b_logNe" ~ "log(Ne)"),
         ana = 'BMLM') %>%
  data.frame()

BMLMglobal_mutRateExt$analysis <- factor(BMLMglobal_mutRateExt$analysis, 
                                         labels = c(expression(paste(mu,' ~ ', lambda)),
                                                    expression(paste(N["e"],' ~ ', lambda))))

sumBMLM <- BMLMglobal_mutRateExt %>%
  group_by(Term, analysis, set) %>%
  median_qi(Estimate) %>%
  rename(medianEstimate = Estimate)

dt <- filter(BMLMglobal_mutRateExt, !set %in% 'treeMCC', 
             analysis %in% c('paste(mu, " ~ ", lambda)',
                             'paste(N["e"], " ~ ", lambda)'))
blmlplot1 <- ggplot(dt) + 
  stat_halfeyeh(aes(x = Estimate, y =  Term), 
                point_interval = median_qi, 
                .width = .95, size = 3, alpha = 0.8) +
  geom_pointintervalh(data = filter(sumBMLM, set %in% 'treeMCC',
                                    analysis %in% c('paste(mu, " ~ ", lambda)',
                                                    'paste(N["e"], " ~ ", lambda)')),
                      aes(x = medianEstimate, y = Term,
                          xmin =.lower, xmax = .upper), shape = 17, size = 3, 
                      fatten_point = 3, position = position_nudge(y = -0.4)) +
  geom_point(data = filter(pglsResultMR, !set %in% 'treeMCC', 
                           analysis %in% c('paste(mu, " ~ ", lambda)',
                                           'paste(N["e"], " ~ ", lambda)')),
             aes(x = Estimate, y =  Term, color = pvalueBin),
             size = 2, show.legend = F, alpha = 0.4,
             position = position_nudge(y = -0.1)) +
  scale_colour_manual(values=c('gray80','red')) + 
  geom_point(data = filter(pglsResultMR, set %in% 'treeMCC', 
                           analysis %in% c('paste(mu, " ~ ", lambda)',
                                           'paste(N["e"], " ~ ", lambda)')),
             aes(x = Estimate, y =  Term, color = pvalueBin),
             size = 3, show.legend = F, alpha = 0.5, shape = 17,
             position = position_nudge(y = -0.5)) +
  geom_vline(aes(xintercept = 0), linetype= 2) +
  facet_wrap(~fct_rev(analysis), labeller = label_parsed, ncol = 1,
             strip.position = "right") +
  theme_bw() + xlim(-0.82,0.22) +
  labs(x = "Slope Estimate") +
  theme(axis.title.y=element_blank(), 
        axis.text.y =element_blank(), axis.ticks.y =element_blank(), 
        strip.text = element_text(size = 14),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())

mgendivSpRateMutRate <- gendivSpRateMutRate %>%
  filter(!set %in% 'treeMCC') %>%
  group_by(species) %>%
  dplyr::summarise(SpRate_mean = mean(tipRate, na.rm = TRUE), 
                   SpRate_lower.ci = CI(tipRate, ci = 0.95)[1],
                   SpRate_upper.ci = CI(tipRate, ci = 0.95)[3],
                   MutRate_mean = mean(mutRate, na.rm = TRUE), 
                   MutRate_lower.ci = CI(mutRate, ci = 0.95)[1],
                   MutRate_upper.ci = CI(mutRate, ci = 0.95)[3],
                   Ne_mean = mean(Ne, na.rm = TRUE), 
                   Ne_lower.ci = CI(Ne, ci = 0.95)[1],
                   Ne_upper.ci = CI(Ne, ci = 0.95)[3],
                   EstPi = mean(EstPi, na.rm = TRUE)) %>%
  drop_na()  %>%
  pivot_longer(cols = MutRate_mean:Ne_upper.ci,
               names_to = c("variable", "x"),
               names_sep = "_") %>%
  pivot_wider(names_from = "x",
              values_from = "value")

mgendivSpRateMutRate$variable <- factor(mgendivSpRateMutRate$variable, 
                                        labels = c(expression(paste("Mutation Rate - ", mu)), 
                                                   expression(paste(N["e"]))))

## mutation rate vs speciation rate
p1 = ggplot(mgendivSpRateMutRate, aes(y = mean, x = SpRate_mean)) +
  geom_errorbarh(aes(xmin = SpRate_lower.ci, xmax = SpRate_upper.ci), color = "gray70") +
  geom_errorbar(aes(ymin = lower.ci, ymax = upper.ci), color = "gray70") +
  geom_point(size = 0.9, color = "gray50") +
  geom_smooth(method=lm, position = "identity", color = 'black', se = F) + 
  scale_x_log10(breaks = breaks_log(n = 8), labels = label_number()) +
  scale_y_log10(labels = label_scientific(), breaks = breaks_log(n = 6)) +
  facet_wrap(fct_rev(variable) ~ .,  labeller = label_parsed, 
             scales = 'free_y', ncol = 1, switch = "y" ) + theme_bw() + 
  labs(y = "", x = expression(paste('Speciation rate - ', lambda))) +
  theme(axis.title.y=element_blank(), #axis.title.x=element_blank(), 
        strip.text = element_text(size = 14), 
        strip.placement = "outside", 
        strip.background.y = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())


q4 = ggarrange(p1, blmlplot1,  ncol = 2) 
q4

ggsave(paste0(fig_path,'Figure4.pdf'), q4, height = 7, width = 10)
ggsave(paste0(fig_path,'Figure4.png'), q4, height = 7, width = 10)

Figure 4. Relationships between speciation rate and the theoretical components of \(\theta\): mutation rate (\(\mu\)) and effective population size (\(N_e\)). Plots with means (\(\mu\)) and (\(N_e\)) and 95% confidence intervals with a regression line and log scaled axis. BMLM results using rates from 100 posterior trees (all posterior distributions; circles) and MCC tree (point with confidence interval; triangles) plotted with median and 95% confidence intervals; with PGLS estimates plotted below and colored red when significant (p-value < 0.05).


Supplementary figures

Fig. S1. Number of individuals used to estimate genetic diversity + Original and mean of subsampled estimates of genetic diversity

gen.div0 <- read.delim(paste0(folder_path, 'genetic_diversity/GenDiv_subsample5Ind.txt')) %>%
  mutate(Nind5 = case_when(Nind == 5 ~ "5 ind.",
                           TRUE ~ ">5 ind."),
         subPi_mean = ifelse(is.na(subPi_mean), EstPi,
                             subPi_mean),
         subTheta_mean = ifelse(is.na(subTheta_mean), EstTheta,
                                subTheta_mean),
         outPi = ifelse(Nind == 5, "5 ind.",
                        as.character(outPi)),
         outPi = ifelse(EstPi == 0, "out",
                        as.character(outPi)),
         outTheta = ifelse(Nind == 5, "5 ind.", 
                           as.character(outTheta)),
         outTheta = ifelse(EstTheta == 0, "out",
                           as.character(outTheta)))

p = ggplot(gen.div0) + 
  geom_point(aes(y = EstPi, x = subPi_mean, 
                 alpha = fct_infreq(outPi), color = fct_infreq(outPi)), shape = 1) +
  scale_x_log10() + scale_y_log10() + theme_bw() +
  labs(y = expression(paste("Original ", theta["T"])), 
       x = expression(paste("Mean subsampled ", theta['T']))) + 
  scale_color_manual(values = c('black',"#71D692","#805C31"),
                     labels = c("Included", "With 5 ind.",
                                "Excluded"), 
                     name = expression(paste("Included species from ", theta['T']))) +
  scale_alpha_manual(values = c(0.1, 0.4, 1),
                     labels = c("Included", "With 5 ind.",
                                "Excluded"), 
                     name = expression(paste("Included species from ", theta['T']))) +
  theme(legend.title = element_text(color = col_v1[95]))


t = ggplot(gen.div0) + 
  geom_point(aes(y = EstTheta, x = subTheta_mean, 
                 alpha = fct_infreq(outTheta), 
                 color = fct_infreq(outTheta)), shape = 1) +
  scale_x_log10() + scale_y_log10() + theme_bw() +
  labs(y = expression(paste("Original ", theta[W])), x = expression(paste("Mean subsampled ", theta[W]))) + 
  scale_color_manual(values = c('black',"#71D692","#805C31"), ##D14D36
                     labels = c("Included", 
                                "With 5 ind.",
                                "Excluded"), 
                     name = expression(paste("Included species from ", theta['W']))) +
  scale_alpha_manual(values = c(0.1, 0.4, 1),
                     labels = c("Included", 
                                "With 5 ind.",
                                "Excluded"), 
                     name = expression(paste("Included species from ", theta['W']))) +
  theme(legend.title = element_text(color = col_v2[90]))
n1 <- ggplot(gen.div0, aes(Nind)) + geom_histogram(aes(y = ..density..), bins = 31,
    fill = "gray70", colour = "white") + geom_density(fill = "white", alpha = 0.5) +
    scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) +   geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') +  scale_x_log10()
    scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) +   geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') +  +
    scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) +   geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') +  #
    scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) +   geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') +  geom_vline(aes(xintercept
    scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) +   geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') +  =
    scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) +   geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') +  mean(Nind)))
    scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) +   geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') +  +
    scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) +   geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') +  geom_vline(aes(xintercept
    scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) +   geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') +  =
    scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) +   geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') +  median(Nind)),
    scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) +   geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') +  linetype
    scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) +   geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') +  =
    scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) +   geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') +  'longdash')
    scale_x_log10() + # geom_vline(aes(xintercept = mean(Nind))) +   geom_vline(aes(xintercept = median(Nind)), linetype = 'longdash') +  +
theme_bw() + labs(y = "Counts", x = "Number of individuals") + scale_y_continuous(breaks = c(0,
    0.25, 0.5, 0.75, 1), labels = function(x) sprintf("%.3f", x)) + theme(panel.grid = element_blank())

dt <- gen.div0 %>%
    select(Nind, species, EstPi, EstTheta) %>%
    pivot_longer(cols = EstPi:EstTheta)

n2 <- ggplot(dt, aes(x = Nind)) + geom_point(aes(y = value, colour = name), alpha = 0.4) +
    scale_color_manual(values = c(col_v1[95], col_v2[90]), labels = c(expression(paste("Estimated" ~
        theta["T"])), expression(paste("Estimated" ~ theta["W"]))), name = "Genetic diversity") +
    # geom_vline(aes(xintercept = mean(Nind))) + geom_vline(aes(xintercept =
    # median(Nind)), linetype = 'longdash') +
theme_bw() + labs(y = "Genetic diversity", x = "Number of individuals") + theme(legend.position = "top",
    panel.grid = element_blank()) + scale_y_continuous(trans = "log10", breaks = c(0,
    0.001, 0.005, 0.01, 0.05, 0.1)) + scale_x_continuous(trans = "log10", breaks = c(5,
    10, 20, 50, 100, 500, 1000))

npnt <- ggarrange(n1, p, n2, t, ncol = 2, nrow = 2, align = "hv", widths = c(0.75,
    1.1, 0.75, 1.1), labels = c("A", "C", "B", "D"))
npnt

ggsave(paste0(fig_path, "FigS1.pdf"), npnt, width = 10, height = 5, useDingbats = FALSE)
ggsave(paste0(fig_path, "FigS1.png"), npnt, width = 10, height = 5)

Fig. S1. Number of sequences with estimated genetic diversities obtained per species (A and B) and original vs mean subsampled genetic diversity estimates (C and D). All species with exactly five individuals were included (green in C and D). Species with genetic diversity above zero for which the estimated \(\theta\) is outside the range of \(\theta\)s 1000 subsampled replicates to five individuals were excluded (brown in C and D) for analyses.


Fig. S2. Genetic diversity and speciation rates per clade

dt <- mGendivSpRate %>%
    filter(!clades %in% "Other") %>%
    select(species, rate_mean, EstPi, EstTheta, clades) %>%
    pivot_longer(cols = c(EstPi:EstTheta))

tr = ggplot(dt) + geom_boxplot(aes(y = value, color = name, x = clades), alpha = 0.2) +
    scale_y_log10() + theme_bw() + scale_fill_manual(values = tcol[2:15]) + scale_color_manual(values = c(col_v1[95],
    col_v2[90]), labels = c(expression(paste("Estimated" ~ theta["T"])), expression(paste("Estimated" ~
    theta["W"])), "rate"), name = "Genetic diversity") + theme(axis.text.x = element_blank(),
    axis.ticks = element_blank(), panel.grid = element_blank(), legend.position = "top",
    plot.margin = unit(c(1, 1, -0.1, 1), "cm")) + guides(fill = FALSE) + labs(y = expression(theta),
    x = "")

r = ggplot(dt) + geom_boxplot(aes(y = rate_mean, x = clades), alpha = 0.2) + scale_y_log10() +
    theme_bw() + theme(axis.text.x = element_text(angle = 25, hjust = 1), panel.grid = element_blank(),
    plot.margin = unit(c(-0.1, 1, 1, 1), "cm")) + labs(y = expression(paste("Mean ",
    lambda)), x = "") + scale_x_discrete(labels = pglsSet)

q = ggarrange(tr, r, ncol = 1, align = "v")
q

ggsave(paste0(fig_path, "FigS2.pdf"), q, width = 10, height = 6, useDingbats = FALSE)
ggsave(paste0(fig_path, "FigS2.png"), q, width = 10, height = 6)

Fig. S2. Genetic diversity (\(\theta\)) and tip speciation rates (\(\lambda\), species mean rates from 100 posterior trees) for the 14 clades with at least 20 species.


Fig. S3. Distribution of tip speciation rates

meanTipRateClade <- gendivSpRateMutRate %>%
    filter(!set %in% "treeMCC") %>%
    group_by(species) %>%
    dplyr::summarise(SpRate_mean = mean(tipRate, na.rm = TRUE)) %>%
    left_join(., clades, by = "species")

p <- ggplot() + stat_density(data = filter(meanTipRateClade, clades %in% tcol0$clades[-15]),
    aes(SpRate_mean, colour = clades), geom = "line", position = "identity", size = 0.5,
    show.legend = T) + scale_colour_manual(values = tcol[2:15], labels = pglsSet) +
    labs(x = bquote("Mean tip speciation rates " ~ (Myr^-1)), y = "Density", colour = "Clades") +
    geom_density(data = meanTipRateClade, aes(x = SpRate_mean), fill = NA, size = 1) +
    geom_vline(data = meanTipRateClade, aes(xintercept = mean(SpRate_mean)), linetype = 2,
        colour = "gray50") + theme_bw() + theme(panel.background = element_rect(fill = "#ffffff"),
    panel.grid = element_blank(), legend.position = "bottom") + scale_x_continuous(trans = "log10")

p

ggsave(paste0(fig_path, "FigS3.pdf"), p, width = 8, height = 5, useDingbats = FALSE)
ggsave(paste0(fig_path, "FigS3.png"), p, width = 8, height = 5)

Fig. S3. Global (black) distribution of species-specific speciation rates estimated from 100 posterior trees and for 14 clades (colored) with more than 20 species for both speciation rates and genetic diversity data.


Fig. S4. PGLS results with R2 across 100 posterior trees and MCC tree

library(nlme)
library(tidyverse)
library(rr2)  ## Ives et al. 2018 R2s
library(caper)

pglsList <- list.files(paste0(folder_path, "pgls/global/output"))

for (pgs in pglsList) {
    pgs <- pglsList[repi]
    tree <- word(tools::file_path_sans_ext(pgs), sep = "_", 4, 4)

    pglspirateMCC <- readRDS(paste0(folder_path, "pgls/global/output/", pgs))$EstPi

    dtset <- pglspirateMCC$data$data %>%
        rownames_to_column("species")

    fit1 <- gls(log(EstPi) ~ log(tipRate), data = dtset, correlation = corPagel(1,
        phy = pglspirateMCC$data$phy, form = ~species), method = "ML")

    r2 <- R2(mod = fit1, pred = FALSE)  ## pred takes a lot of time and I don't think it makes sense with something that is so badly predicted by the model, the focus is more in just showing the range of correlation across posterior trees, we already know the correlation strength is really weak.

    saveRDS(c(fit1, r2), paste0(folder_path, "pgls/global/outputR2_rr2/", tree, ".rds"))
}

####
pglsList <- list.files(paste0(folder_path, "pgls/global/outputR2_rr2"))
dtR2 <- data.frame()
for (tree in pglsList) {
    Tree <- tools::file_path_sans_ext(tree)

    fit1 <- readRDS(paste0(folder_path, "pgls/global/outputR2_rr2/", tree))
    class(fit1) <- "gls"
    dtR2 <- bind_rows(dtR2, cbind(Tree, as.data.frame(summary(fit1)$tTable)[2, c(1,
        4)], as.character(summary(fit1)$modelStruct), fit1$R2_lik, fit1$R2_resid))

}

colnames(dtR2) <- c("set", "Estimate", "pvalue", "lambda", "R2_lik", "R2_resid")
saveRDS(dtR2, paste0(folder_path, "pgls/global/rr2Output.rds"))
### make boxplot with each panel for slope estimate, p-value, lambda and
### R2resid

# Also shown is the PGLS slope estimate, significance, lambda transformation
# and explained variance for the consensus tree (R2resid; estimated as Ives
# 2019).

dtR2 <- readRDS(paste0(folder_path, "pgls/global/rr2Output.rds")) %>%
    mutate(lambda = as.numeric(lambda)) %>%
    pivot_longer(cols = -set)

dtR2$name <- factor(dtR2$name, levels = c("Estimate", "pvalue", "lambda", "R2_resid",
    "R2_lik"))

p <- ggplot(filter(dtR2, !set %in% "treeMCC"), aes(x = name, y = value)) + geom_boxplot(outlier.shape = NA) +
    geom_jitter(alpha = 0.4, size = 0.7) + geom_point(data = filter(dtR2, set %in%
    "treeMCC"), aes(x = name, y = value), color = "red", size = 1.2) + facet_wrap(~name,
    scales = "free", nrow = 1) + ggh4x::facetted_pos_scales(y = list(name == "pvalue" ~
    scale_y_log10())) + theme_bw() + theme(strip.background = element_blank(), strip.text.x = element_blank()) +
    labs(x = "", y = expression(paste(theta["T"], "~", lambda, " PGLS output"))) +
    scale_x_discrete(breaks = levels(dtR2$name), labels = c("Slope estimate", "p-value",
        expression(paste("Pagel's ", lambda)), expression(paste("R"["resid"]^"2")),
        expression(paste("R"["lik"]^"2"))))
p

ggsave(paste0(fig_path, "FigS4.pdf"), p, width = 8, height = 5, useDingbats = FALSE)
ggsave(paste0(fig_path, "FigS4.png"), p, width = 8, height = 5)

Fig. S4. Output from PGLS analysis using \(\theta_{T}\) as response variable with \(R^2\) estimates from Ives 2019, across 100 posterior trees (black) and MCC tree (red).


Fig. S5. Comparing models using different \(\theta\) estimates

###prepare PGLS plotting
pgls2 <- bind_rows(PGLSglobal, PGLSclade) %>%
  filter(Term %in% 'log(tipRate)') %>%
  select(c('clade','set','analysis','Estimate','Pr...t..')) %>%
  mutate(colour = case_when(is.na(clade) ~ 'All Mammals',
                            TRUE ~ 'clades'),
         size = case_when(set %in% 'treeMCC' ~ 'MCC',
                          TRUE ~ 'Posterior'),
         pvalue = ifelse(Pr...t.. < 0.05, "sig.", "not sig."),
         shape = paste0(analysis,"_", pvalue),
         analysis = fct_relevel(analysis,"EstPi", "subPi", "EstTheta","subtheta"),
         clade = ifelse(is.na(clade), 'ALL', as.character(clade))) %>%
  arrange(analysis)

sig <- c('black','white')
ppgls <- ggplot(pgls2) +
  geom_point(aes(y = Estimate, x = clade, 
                 group = analysis, 
                 size = fct_infreq(size), 
                 colour = fct_inorder(shape),
                 shape = fct_rev(pvalue),
                 fill = fct_inorder(shape)
                 ), 
             stroke = 0.5, #alpha = 0.2,
             position = position_dodge(width = 0.65)) +
    geom_vline(xintercept = 1.5, size = 0.2, linetype = 'dashed') +
  geom_hline(yintercept = 0, size = 0.2) + 
  theme_bw() +
    scale_shape_manual(name = 'PGLS sig.',
                     values = c(23, 21), 
                     labels = c("< 0.05", "> 0.05"),
                      guide = guide_legend(override.aes =
                                      list(color = c("black","gray90"),
                                           alpha = 1), nrow=2)
                     ) +
    scale_colour_manual(name = 'PGLS sig.',
                      values = rep(sig,4),
                      guide = "none"
                      #breaks = unique(pgls2$shape)[1:2],
                      # labels = c("< 0.05", "> 0.05"),
                      # guide = guide_legend(override.aes =
                      #                 list(color = c("gray90","black"),
                      #                      alpha = 1), nrow=2)
                      ) +
  scale_fill_manual(name = 'Genetic diversity',
                    values = c(col_v1[98], col_v1[98],
                               col_v1[70], col_v1[70],
                               col_v2[98], col_v2[98],
                               col_v2[70], col_v2[70]),
                    labels = c(expression(paste("Original" ~ theta['T'])),
                               expression(paste("Subsampled" ~ theta['T'])),
                               expression(paste("Original" ~ theta['W'])),
                               expression(paste("Subsampled" ~ theta['W']))),
                   guide = guide_legend(override.aes =
                                      list(color = c(col_v1[98], col_v1[70],
                                                    col_v2[98], col_v2[70],
                                           NA,NA,NA,NA),
                                           alpha = 1), nrow=2, order = 1)
                   ) +
  scale_size_manual(name = 'Tree set',
                    values = c(1.5,4),
                    labels = c("Posterior trees","MCC tree"),
                     guide = guide_legend(nrow=2)) +
  scale_x_discrete(labels=c("All Mammals", pglsSet)) +
  theme(legend.position ="top",
        plot.margin = unit(c(0,0,0,0), "cm"),
                  legend.spacing.y = unit(0.1, 'cm'),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.x = element_blank()) +
  labs(y = "PGLS estimates", x = "") + 
  scale_y_continuous(limits = c(-1.8,0.65)) 
## prepare BMLM plotting

BMLMglobalsum <- tibble()
for (i in names(BMLMglobal)) {
    for (g in names(BMLMglobal[[i]])) {
        gl <- BMLMglobal[[i]][[g]] %>%
            group_by(model, set) %>%
            median_qi(b_logtipRate) %>%
            rename(.value = b_logtipRate) %>%
            mutate(.variable = "All Mammals")

        cl <- BMLMglobal[[i]][[g]] %>%
            group_by(.variable, model, set) %>%
            median_qi(.value)
        BMLMglobalsum <- bind_rows(BMLMglobalsum, bind_rows(gl, cl))
    }
}

BMLMglobalsum1 <- BMLMglobalsum %>%
    filter(.variable %in% "All Mammals") %>%
    select(-.width, -.point, -.interval, -.variable) %>%
    mutate(colour = fct_relevel(case_when(set %in% "treeMCC" ~ "MCC", TRUE ~ "Posterior"),
        "Posterior", "MCC")) %>%
    pivot_wider(names_from = model, values_from = c(.value, .lower, .upper)) %>%
    arrange(colour)

BMLMglobalsum2 <- BMLMglobalsum %>%
    select(-.width, -.point, -.interval) %>%
    mutate(colour = case_when(.variable %in% "All Mammals" ~ "All Mammals", TRUE ~
        "clades"), size = case_when(set %in% "treeMCC" ~ "MCC", TRUE ~ "Posterior"),
        analysis = fct_relevel(model, "estPi", "subPi", "estTheta", "subTheta"),
        clade = ifelse(.variable %in% "All Mammals", "ALL", as.character(.variable))) %>%
    arrange(analysis)
boldSet <- c("All Mammals", pglsSet)

bold.labels <- ifelse(boldSet %in% c("All Mammals", "Eulipotyphla", "Guinea Pig-related",
    "Lagomorpha", "Muridae", "Squirrel-related", "Yinpterochiroptera"), yes = "bold",
    no = "plain")

pbmlm <- ggplot() + geom_pointinterval(data = filter(BMLMglobalsum2, set %in% "treeMCC"),
    aes(y = .value, x = clade, ymin = .lower, ymax = .upper, color = analysis, group = analysis),
    alpha = 0.8, point_alpha = 0.8, point_size = 4, size = 4, position = position_dodge(width = 0.85)) +
    geom_pointinterval(data = filter(BMLMglobalsum2, !set %in% "treeMCC"), aes(y = .value,
        x = clade, ymin = .lower, ymax = .upper, group = analysis, colour = analysis),
        alpha = 0.07, point_alpha = 0.1, point_size = 0.7, size = 0.7, position = position_jitterdodge(jitter.width = 0.65,
            dodge.width = 0.85)) + geom_vline(xintercept = 1.5, size = 0.2, linetype = "dashed") +
    geom_hline(yintercept = 0, size = 0.2) + scale_color_manual(name = "Genetic diversity",
    values = c(col_v1[98], col_v1[70], col_v2[98], col_v2[70]), guide = "none") +
    scale_x_discrete(labels = boldSet) + theme_bw() + theme(legend.position = "bottom",
    plot.margin = unit(c(0, 0, 0, 0), "cm"), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.text.x = element_text(angle = 30, hjust = 1,
        face = bold.labels)) + labs(y = "BMLM estimates", x = "") + scale_y_continuous(limits = c(-1.8,
    0.65))

qs4 = ggpubr::ggarrange(ppgls, pbmlm, ncol = 1, nrow = 2, common.legend = T, legend = "bottom",
    align = "v", heights = c(0.8, 1))
qs4

ggsave(paste0(fig_path, "FigS5.pdf"), qs4, width = 9, height = 6, useDingbats = FALSE)
ggsave(paste0(fig_path, "FigS5.png"), qs4, width = 9, height = 6)

Fig. S5. Estimates from PGLS (top) and BMLM (bottom) analyses of the slope of the relationship between genetic diversity and speciation rates across all mammals (on the left) and each of the 14 clades with at least 20 species (on the right). Analyses were performed using either original estimates of \(\theta_{T}\) and \(\theta_{W}\) or mean estimates over 1000 subsampled replicates, and for the MCC tree as well as 100 posterior trees.


Fig. S6. Relationship with different thresholds of sequences per species

load(paste0(folder_path, "speciation_rate/output/upham_4064sp_FR_MCCposterior100.rdata"))
gendivSpRateN <- spRate %>%
    left_join(., select(gen.div, species, EstPi, Nind), by = "species")

DataSubsetMCC10 <- gendivSpRateN %>%
    filter(set %in% "treeMCC", Nind > 9)

Nspecies10 <- DataSubsetMCC10 %>%
    group_by(clades) %>%
    dplyr::summarise(NPisp10 = n())

treeMCC <- TreeSet[["treeMCC"]]

TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
    DataSubsetMCC10$species)]))

datacomp <- comparative.data(data = DataSubsetMCC10, phy = TreeSubset, names.col = "species",
    vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)

# modpi10 <- pgls(log(EstPi) ~ log(tipRate), data = datacomp, lambda = 'ML')
# saveRDS(modpi10, paste0(folder_path,'pgls/extra/modpi10.rds'))
modpi10 <- readRDS(paste0(folder_path, "pgls/extra/modpi10.rds"))
# summary(modpi10)

#####

DataSubsetMCC20 <- gendivSpRateN %>%
    filter(set %in% "treeMCC", Nind > 19)

Nspecies20 <- DataSubsetMCC20 %>%
    group_by(clades) %>%
    dplyr::summarise(NPisp20 = n())

treeMCC <- TreeSet[["treeMCC"]]

TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
    DataSubsetMCC20$species)]))

datacomp <- comparative.data(data = DataSubsetMCC20, phy = TreeSubset, names.col = "species",
    vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)

# modpi20 <- pgls(log(EstPi) ~ log(tipRate), data = datacomp, lambda = 'ML')
# saveRDS(modpi20, paste0(folder_path,'pgls/extra/modpi20.rds'))
modpi20 <- readRDS(paste0(folder_path, "pgls/extra/modpi20.rds"))
# summary(modpi20)

#####

DataSubsetMCC50 <- gendivSpRateN %>%
    filter(set %in% "treeMCC", Nind > 49)

Nspecies50 <- DataSubsetMCC50 %>%
    group_by(clades) %>%
    dplyr::summarise(NPisp50 = n())

treeMCC <- TreeSet[["treeMCC"]]

TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
    DataSubsetMCC50$species)]))

datacomp <- comparative.data(data = DataSubsetMCC50, phy = TreeSubset, names.col = "species",
    vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)

# modpi50 <- pgls(log(EstPi) ~ log(tipRate), data = datacomp, lambda = 'ML')
# saveRDS(modpi50, paste0(folder_path,'pgls/extra/modpi50.rds'))
modpi50 <- readRDS(paste0(folder_path, "pgls/extra/modpi50.rds"))
# summary(modpi50)

#####

DataSubsetMCC75 <- gendivSpRateN %>%
    filter(set %in% "treeMCC", Nind > 74)

Nspecies75 <- DataSubsetMCC75 %>%
    group_by(clades) %>%
    dplyr::summarise(NPisp75 = n())

treeMCC <- TreeSet[["treeMCC"]]

TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
    DataSubsetMCC75$species)]))

datacomp <- comparative.data(data = DataSubsetMCC75, phy = TreeSubset, names.col = "species",
    vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)

# modpi75 <- pgls(log(EstPi) ~ log(tipRate), data = datacomp, lambda = 'ML')
# saveRDS(modpi75, paste0(folder_path,'pgls/extra/modpi75.rds'))
modpi75 <- readRDS(paste0(folder_path, "pgls/extra/modpi75.rds"))
# summary(modpi75)

#####

DataSubsetMCC100 <- gendivSpRateN %>%
    filter(set %in% "treeMCC", Nind > 99)

Nspecies100 <- DataSubsetMCC100 %>%
    group_by(clades) %>%
    dplyr::summarise(NPisp100 = n())

treeMCC <- TreeSet[["treeMCC"]]

TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
    DataSubsetMCC100$species)]))

datacomp <- comparative.data(data = DataSubsetMCC100, phy = TreeSubset, names.col = "species",
    vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)

# modpi100 <- pgls(log(EstPi) ~ log(tipRate), data = datacomp, lambda = 'ML')
# saveRDS(modpi100, paste0(folder_path,'pgls/extra/modpi100.rds'))
modpi100 <- readRDS(paste0(folder_path, "pgls/extra/modpi100.rds"))
# summary(modpi100)
#modpirateMCC <- readRDS(paste0(folder_path,'pgls/extra/pglsPispRateMCC.rds'))
Nspecies5 <- gendivSpRate %>%
  filter(set %in% 'treeMCC') %>%
  group_by(clades) %>%
  dplyr::summarise(Nsp = n(),
                   NPisp5 = sum(!is.na(EstPi)))

NspeciesSF <- left_join(Nspecies5,Nspecies10) %>%
  left_join(Nspecies20) %>%
  left_join(Nspecies50) %>%
  left_join(Nspecies75) %>%
  left_join(Nspecies100) %>%
  pivot_longer(cols=-c(clades,Nsp)) %>%
  mutate(SF = value/Nsp) %>%
  filter(clades %in% unique(PGLSclade$clade))
  
SFPlot <- ggplot(NspeciesSF, 
                 aes(x = fct_inorder(str_replace(name, "NPisp","")),
                     y = SF)) +
  geom_boxplot() +
  geom_point(aes(color = clades)) +
  scale_colour_manual(values = tcol[2:15], 
                      labels = pglsSet) +
  theme_bw() +
  labs(y = "Sampling fraction of available species\ngiven a threshold",
       x = "Threshold for min. number of ind. per alignment") +
  theme(legend.position = 'bottom')

NspThres <- NspeciesSF %>%
  dplyr::group_by(name) %>%
  dplyr::summarize(Nsp = sum(value, na.rm = T)) %>%
  rename(thres = name) 

pglsSum <- PGLSglobal %>%
  filter(set %in% 'treeMCC', 
         analysis %in% 'EstPi', 
         Term %in% "log(tipRate)") %>%
  select(Estimate, Pr...t..) %>% 
  mutate(thres = 'NPisp5') %>%
  bind_rows(.,data.frame(thres = 'NPisp10', t(summary(modpi10)$coefficients[2,c(1,4)])),
                     data.frame(thres = 'NPisp20', t(summary(modpi20)$coefficients[2,c(1,4)])),
                     data.frame(thres = 'NPisp50', t(summary(modpi50)$coefficients[2,c(1,4)])),
                     data.frame(thres = 'NPisp75', t(summary(modpi75)$coefficients[2,c(1,4)])),
                     data.frame(thres = 'NPisp100', t(summary(modpi100)$coefficients[2,c(1,4)]))) %>%
  left_join(NspThres)

pglsThres <- ggplot(pglsSum, aes(x = Nsp, y = Estimate)) +
  geom_point(aes(color = Pr...t..)) +
  ggrepel::geom_label_repel(aes(label = str_replace(thres, "NPisp",""))) +
  theme_bw()+
  scale_x_continuous(breaks=pglsSum$Nsp) +
  scale_color_gradient(name = "P-value", #trans = "log",
                       breaks = c(1e-10, 0.05), 
                       labels = c(1e-10, 0.05)) +
  labs(x = "Number species in PGLS analysis for a given threshold", 
       y = "PGLS slope estimate") +
  theme(legend.position = 'bottom',
        panel.grid.minor = element_blank()) 
gendivThres <- ggplot(data =  filter(gendivSpRate, set %in% 'treeMCC'), 
             aes(y = EstPi, x = tipRate)) + 
  geom_point(size = 2, alpha = 0.1) + 
  scale_y_continuous(trans='log10',expand=c(0,0)) +
  scale_x_continuous(trans='log10',expand=c(0,0)) +
  stat_smooth(aes(color = '5'), method=lm, position = "identity", 
              size=0.7, se = FALSE, geom = 'line',  fullrange = TRUE) + 
  stat_smooth(data = DataSubsetMCC10, 
              aes(y = EstPi, x = tipRate, color = "10"), geom = 'line',
              linetype = 'dashed',  show_guide=TRUE,
              method=lm, se = FALSE, position = "identity", size=0.7, 
              fullrange = TRUE) + 
  stat_smooth(data = DataSubsetMCC20, 
              aes(y = EstPi, x = tipRate, color = "20"), geom = 'line',
              linetype = 'dashed',  show_guide=TRUE,
              method=lm, se = FALSE, position = "identity", size=0.7, 
              fullrange = TRUE) + 
   stat_smooth(data = DataSubsetMCC50, 
              aes(y = EstPi, x = tipRate, color = "50"), geom = 'line',
              linetype = 'dashed',  show_guide=TRUE,
              method=lm, se = FALSE, position = "identity", size=0.7, 
              fullrange = TRUE) + 
  stat_smooth(data = DataSubsetMCC75, 
              aes(y = EstPi, x = tipRate, color = "75"), geom = 'line',
              linetype = 'dashed',  show_guide=TRUE,
              method=lm, se = FALSE, position = "identity", size=0.7, 
              fullrange = TRUE) + 
    stat_smooth(data = DataSubsetMCC100, 
              aes(y = EstPi, x = tipRate, color = "100"), geom = 'line',
              linetype = 'dashed',  show_guide=TRUE,
              method=lm, se = FALSE, position = "identity", size=0.7, 
              fullrange = TRUE) + 
  theme_bw() + 
  theme(#plot.margin = unit(c(0,0,0,0), "cm"),
        legend.position = "right") + 
  labs(y = expression(paste(theta['T'])), 
       x = bquote('Speciation rate '~(Myr^-1))) + 
  scale_colour_manual(name="Threshold of number of\nindividuals per alignment",
                      values = c("5"="black","10"="lightskyblue",
                      "20"="olivedrab","50"="orange2",
                      "75"="red2","100"="purple"),
                      breaks = c("5","10","20","50","75","100"))

qjoint <- ggarrange(ggarrange(NULL, gendivThres, widths = c(0.2,1),
                              labels = c("","A")),
                    ggarrange(SFPlot, pglsThres, nrow = 1, align = 'hv',
                              labels = c("B","C")),
                    ncol = 1, 
                    heights = c(1,1.2), align = 'hv')
qjoint

ggsave(paste0(fig_path, 'FigS6.pdf'), qjoint, 
       width = 16, height = 10, useDingbats = FALSE )
ggsave(paste0(fig_path, 'FigS6.png'), qjoint, 
       width = 16, height = 10)

Fig. S6. Differences in threshold for number of individuals per species alignment. Slope of the OLS given six thresholds for minimum number of sequences (A), with continuous line for the used threshold across all analyses (5 individuals) and grey points for used species with speciation rates from MCC tree. The sampling fraction per clade for each threshold (B) was estimated relative to the total number of species in the DNA-only phylogeny. Relationship between PGLS slopes (MCC tree) and the number of species in the analysis (C) given the threshold of number of sequences per species alignment.


Fig. S7. Traits results from posterior trees dataset

pglsResulttraits <- PGLSglobal_traits %>% 
  filter(!Term %in% '(Intercept)') %>%
  rename(pvalue = Pr...t..) %>%
  mutate(pvalueBin = ifelse(pvalue < 0.05, 'Signif.','Not signif.'),
         Term = recode(Term, `log(tipRate)` = "Speciation rate", 
                       `log(BodyMassKg_notInputed)` = "Body Mass",
                       `log(geoArea_km2)` = "Range area",
                       `log(mean_temp)` = "Mean temperature",
                       `log(litter_or_clutch_size_n)` = "Litter size",
                       `log(GenerationLength_d)` = "Generation length")) %>%
  select(analysis, set, Estimate, Term, pvalue, pvalueBin) 

pglsResulttraits$Term <- factor(pglsResulttraits$Term,
                                levels = c("Speciation rate", "Body Mass", "Range area",
                                           "Mean temperature", "Litter size", 
                                           "Generation length"))

pglsResulttraits$analysis <- factor(pglsResulttraits$analysis, 
                                    levels = c("piTraits",
                                               "SpRateTraits",
                                               "piSpRateTraits"),
                                    labels = c(expression(paste(theta["T"]," ~ Traits")),
                                               expression(paste(lambda, " ~ Traits")),
                                               expression(paste(theta["T"],' ~ ', lambda, " + Traits"))))



pp1 <- ggplot(data = filter(pglsResulttraits, !set %in% 'treeMCC', 
                            analysis %in% 'paste(theta["T"], " ~ ", lambda, " + Traits")')) +
  geom_vline(aes(xintercept = 0), linetype= 2) +
  geom_point(aes(x = Estimate, y =  fct_rev(fct_inorder(Term)), color = pvalueBin), 
             alpha = 0.2, size = 2,  show.legend = F) +
  facet_wrap(~analysis, ncol = 1, drop = T, scales = 'free_y', labeller = label_parsed) + 
  theme_bw() +  labs(x = "Slope Estimate") +
  xlim(-0.46, 0.34)  + 
  scale_colour_manual(values=c('gray80','red')) + 
  theme(axis.title.y=element_blank(), #axis.title.x=element_blank(),
        #axis.text.x=element_blank(), #axis.ticks.x=element_blank(), 
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        text = element_text(size=15)) 

pp2 <- ggplot(data = droplevels(filter(pglsResulttraits, !set %in% 'treeMCC', 
                                       !analysis %in% 'paste(theta["T"], " ~ ", lambda, " + Traits")'))) +
  geom_vline(aes(xintercept = 0), linetype= 2) +
  geom_point(aes(x = Estimate, y =  fct_rev(fct_inorder(Term)), color = pvalueBin), 
             alpha = 0.2, size = 2, show.legend = F) +
  facet_wrap(~analysis, ncol = 1, drop = T, scales = 'free', labeller = label_parsed) +
  theme_bw() + xlim(-0.46, 0.34)  + 
  labs(title = 'PGLS') +
  scale_colour_manual(values=c('gray80','red')) + 
  theme(axis.title.y=element_blank(),  axis.title.x=element_blank(),
        axis.text.x=element_blank(), axis.ticks.x=element_blank(), 
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        text = element_text(size=15)) 

p = ggarrange(pp2, pp1, nrow = 2, heights = c(1.7, 1), align = "v")


BMLMglobal_traitsExtPos <- BMLMglobal_traitsExt %>%
  mutate(Term = recode(Term, b_logtipRate = "Speciation rate", 
                       b_logBodyMassKg_notInputed = "Body Mass",
                       b_loggeoArea_km2 = "Range area",
                       b_logmean_temp = "Mean temperature",
                       b_loglitter_or_clutch_size_n = "Litter size",
                       b_logGenerationLength_d = "Generation length"),
         ana = 'BMLM')

BMLMglobal_traitsExtPos$Term <- factor(BMLMglobal_traitsExtPos$Term,
                                       levels = c("Speciation rate", "Body Mass", "Range area",
                                                  "Mean temperature", "Litter size", 
                                                  "Generation length"))

BMLMglobal_traitsExtPos$analysis <- factor(BMLMglobal_traitsExtPos$analysis, 
                                           levels = c("piTraits",
                                                      "SpRateTraits",
                                                      "piSpRateTraits"),
                                           labels = c(expression(paste(theta["T"]," ~ Traits")),
                                                      expression(paste(lambda, " ~ Traits")),
                                                      expression(paste(theta["T"],' ~ ', lambda, " + Traits"))))

b1 <-  ggplot(data = filter(BMLMglobal_traitsExtPos, !set %in% 'treeMCC',
                            analysis %in% 'paste(theta["T"], " ~ ", lambda, " + Traits")')) +
  geom_vline(aes(xintercept = 0), linetype= 2) +
  geom_halfeyeh(aes(x = Estimate, y =  fct_rev(fct_inorder(Term))), 
                point_interval = median_qi, .width = .95, scale = 1, #size = 4,
                #position = position_nudge(y = 0.1), 
                alpha = 0.8) +
  facet_wrap(~analysis, ncol = 1, drop = T, scales = 'free_y', labeller = label_parsed) +
  labs(x = "Slope Estimate") +
  theme_bw() + xlim(-0.89, 0.81)  + 
  theme(
    axis.title.y=element_blank(),
    axis.text.y=element_blank(), axis.ticks.y=element_blank(), 
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(), 
    text = element_text(size=15)) 

b2 <- ggplot(data = droplevels(filter(BMLMglobal_traitsExtPos, !set %in% 'treeMCC',
                                      !analysis %in% 'paste(theta["T"], " ~ ", lambda, " + Traits")'))) +
  geom_vline(aes(xintercept = 0), linetype= 2) +
  geom_halfeyeh(aes(x = Estimate, y =  fct_rev(fct_inorder(Term))), 
                point_interval = median_qi, .width = .95, scale = 1, #size = 4, 
                #position = position_nudge(y = 0.1), 
                alpha = 0.8) +
  facet_wrap(~analysis, ncol = 1, drop = T, scales = 'free_y', labeller = label_parsed ) +
  labs(title = 'BMLM') +
  theme_bw() + xlim(-0.89, 0.81)  + 
  theme(axis.title=element_blank(), 
        #axis.title.y=element_blank(),
        axis.text=element_blank(), axis.ticks=element_blank(), 
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        text = element_text(size=15)) 

bb = ggarrange(b2, b1, nrow = 2, heights = c(1.7, 1), align = "v")

q5 <- ggarrange(p,bb,ncol = 2, widths = c(1.35,1), align = "hv")
q5

ggsave(paste0(fig_path, 'FigS7.pdf'), q5, width = 10, height = 8, useDingbats = FALSE )
ggsave(paste0(fig_path, 'FigS7.png'), q5, width = 10, height = 8)

Fig. S7. Results for the three models to test the effect of including life-history traits in the model with genetic diversity and speciation rate. The model with speciation rate as predictor suggest the association between genetic diversity and speciation rate is not due to traits affecting both variables. Analyses using the posterior dataset with PGLS estimates colored red when significant (p-value < 0.05) and BMLM results plotted with median and 95% confidence intervals from samples draw from all posterior dataset analyses.


Fig. S8. Genetic diversity with Mutation rate and Ne

## pi vs mutation rate & Ne
p3 = ggplot(mgendivSpRateMutRate, aes(y = EstPi, x = mean)) + geom_errorbar(aes(xmin = lower.ci,
    xmax = upper.ci), color = "gray70") + geom_point(size = 0.9, color = "gray50") +
    stat_smooth(method = lm, color = "black", se = F, fullrange = T) + scale_x_log10() +
    scale_y_log10(breaks = c(0.001, 0.01, 0.1), labels = c(0.001, 0.01, 0.1), limits = c(0.00017,
        0.15)) + facet_wrap(~fct_rev(variable), labeller = label_parsed, scales = "free_x",
    switch = "x") + theme_bw() + labs(y = expression(paste("Genetic diversity - ",
    theta["T"]))) + theme(axis.title.x = element_blank(), axis.title.y = element_text(size = 14),
    strip.text = element_text(size = 14), strip.placement = "outside", strip.background.x = element_blank(),
    panel.grid.major = element_blank(), panel.grid.minor = element_blank())

p3

ggsave(paste0(fig_path, "FigS8.pdf"), p3, width = 9, height = 5, useDingbats = FALSE)
ggsave(paste0(fig_path, "FigS8.png"), p3, width = 9, height = 5)

Fig. S8. Relationships between genetic diversity (\(\theta_{T}\)) and the theoretical components of \(\theta\): effective population size (\(N_e\)) and mutation rate (\(\mu\)). Plots with means (\(N_e\)) and (\(\mu\)) and 95% confidence intervals with a regression line and log scaled axis.


Fig. S9. MCC PGLS with per codon site genetic diversity

#### with at least 10 individuals to allow for more variants within alignments
#### and only for species with non null variation for all codon sites

wd <- paste0(folder_path, "pgls/global_perSite/output/")
dir.create(wd, recursive = T)

#### first detect species are out of frame and remove them from data set
library(bioseq)
fastas <- list.files(paste0(folder_path, "superCrunch_family/Output_alignments/"),
    pattern = "fasta", full.names = T)

checkFrame <- data.frame()
for (i in fastas) {
    dna <- read_fasta(i)

    if (length(dna) > 4) {
        species <- tools::file_path_sans_ext(basename(i))
        trans <- seq_translate(dna, code = 2, codon_frame = 1)
        checkFrame <- bind_rows(checkFrame, data.frame(species = species, check = ifelse(any(str_detect(trans,
            "\\*")), "notInFrame1", "inFrame1")))

        if (checkFrame[checkFrame$species %in% species, "check"] %in% "notInFrame1") {
            dna <- read_fasta(i)
            trans <- seq_translate(dna, code = 2, codon_frame = 2)
            checkFrame[checkFrame$species %in% species, "check"] <- ifelse(any(str_detect(trans,
                "\\*")), "notInFrame12", "inFrame2")
        }
        if (checkFrame[checkFrame$species %in% species, "check"] %in% "notInFrame12") {
            trans <- seq_translate(dna, code = 2, codon_frame = 3)
            checkFrame[checkFrame$species %in% species, "check"] <- ifelse(any(str_detect(trans,
                "\\*")), "notInFrame123", "inFrame3")
        }
    }
}

write.table(checkFrame, row.names = FALSE, quote = FALSE, sep = "\t", paste0(folder_path,
    "genetic_diversity/speciesFrame.txt"))

# discard species without genetic diversity, species which either mean
# subsampled pi or theta are outside the range of 1000 subsamples to 5
# individuals, and species not in frame1 (only about 50 some species are not in
# frame)

gen.div <- read.delim(paste0(folder_path, "genetic_diversity/GenDiv_subsample5Ind.txt")) %>%
    filter(EstPi > 0, !outPi %in% "out", !outTheta %in% "out", species %in% checkFrame[checkFrame$check %in%
        "inFrame1", "species"])

clades <- read.delim(paste0(folder_path, "speciation_rate/input/MamPhy_5911sp_tipGenFamOrdCladeGenesSampPC.txt")) %>%
    drop_na(PC) %>%
    mutate(species = word(tiplabel, 1, 2, sep = "_"), clades = sub("^PC\\d+_", "",
        PC)) %>%
    dplyr::select(species, clades)

spRate <- read.delim(paste0(folder_path, "speciation_rate/output/MCCposterior100_tipRate.txt")) %>%
    rename(set = treeN) %>%
    left_join(., clades, by = "species")

GendivSpRate10 <- spRate %>%
    arrange(clades) %>%
    inner_join(., select(gen.div, species, Nind, EstPi, EstPi_1:EstPi_3), by = "species") %>%
    filter(Nind > 10, EstPi > 0, EstPi_1 > 0, EstPi_2 > 0, EstPi_3 > 0) %>%
    droplevels()

saveRDS(GendivSpRate10, paste0(folder_path, "genetic_diversity/GendivSpRate10.rds"))

repiTree <- unique(GendivSpRate10$set)[repi]

DataSubset <- GendivSpRate10 %>%
    filter(set %in% repiTree) %>%
    drop_na()

## load phylogenies and speciation rates from 100 posterior trees + MCC tree
load(paste0(folder_path, "speciation_rate/output/upham_4064sp_FR_MCCposterior100.rdata"))  #loads TreeSet
treeMCC <- TreeSet[[repiTree]]  ### to run per tree in the cluster

TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
    DataSubset$species)]))
TreeSubset

datacomp <- comparative.data(data = DataSubset, phy = TreeSubset, names.col = "species",
    vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)

tryCatch({
    modEstPi <- NULL
    modEstPi <- pgls(log(EstPi) ~ log(tipRate), data = datacomp, lambda = "ML")

    modEstPi1 <- NULL
    modEstPi1 <- pgls(log(EstPi_1) ~ log(tipRate), data = datacomp, lambda = "ML")

    modEstPi2 <- NULL
    modEstPi2 <- pgls(log(EstPi_2) ~ log(tipRate), data = datacomp, lambda = "ML")

    modEstPi3 <- NULL
    modEstPi3 <- pgls(log(EstPi_3) ~ log(tipRate), data = datacomp, lambda = "ML")

}, error = function(e) {
    cat("Error in", conditionMessage(e), "\n")
})

pgls <- list(EstPi = modEstPi, EstPi1 = modEstPi1, EstPi2 = modEstPi2, EstPi3 = modEstPi3)

saveRDS(pgls, paste0(wd, "global_gendivSpRatePerSite_results_", repiTree, ".rds"))
## load gendiv object that was filtered out of species with cytb out of frame 1
GendivSpRate10 <- readRDS(paste0(folder_path, "genetic_diversity/GendivSpRate10.rds"))

mGendivSpRateSite <- GendivSpRate10 %>%
    filter(set %in% "treeMCC") %>%
    pivot_longer(cols = EstPi:EstPi_3) %>%
    mutate(bp = "Boxplot", corr = "OLS")

mGendivSpRateSite$name <- factor(mGendivSpRateSite$name, labels = c(expression(paste("Total ",
    theta["T"])), expression(paste("First position ", theta["T"])), expression(paste("Second position ",
    theta["T"])), expression(paste("Third position ", theta["T"]))))


pglssum101 <- readRDS(paste0(folder_path, "pgls/global_perSite/global_gendivSpRatePerSite_PGLSresults.rds")) %>%
    filter(term %in% "log(tipRate)") %>%
    remove_rownames()

pgls100 <- pglssum101 %>%
    filter(!set %in% "treeMCC") %>%
    mutate(pvalueBin = ifelse(Pr...t.. < 0.01, "Signif.", "Not signif."), pgls = "PGLS",
        name = analysis) %>%
    select(set, pgls, name, Estimate, pvalueBin)

pgls100$name <- factor(pgls100$name, labels = c(expression(paste("Total ", theta["T"])),
    expression(paste("First position ", theta["T"])), expression(paste("Second position ",
        theta["T"])), expression(paste("Third position ", theta["T"]))))

meanTreeEstimate <- pgls100 %>%
    group_by(name) %>%
    dplyr::summarize(mean(Estimate))

pglssumMCC <- pglssum101 %>%
    filter(set %in% "treeMCC") %>%
    mutate(name = unique(mGendivSpRateSite$name), pgls = "PGLS", estimate = paste("MCC Estimate =",
        round(Estimate, 4)), pvalue = paste("MCC p-value =", formatC(Pr...t.., format = "E",
        digits = 3)), label = paste(estimate, pvalue, collapse = "\n")) %>%
    left_join(meanTreeEstimate)

ss <- ggplot(mGendivSpRateSite, aes(y = value)) + geom_boxplot() + scale_y_log10() +
    theme_bw() + labs(y = "Genetic diversity", x = " ") + facet_grid(bp ~ name, labeller = label_parsed) +
    theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), strip.text = element_text(size = 12),
        panel.spacing.x = unit(0, "lines"), panel.grid.major = element_blank(), panel.grid.minor = element_blank())

ps <- ggplot(mGendivSpRateSite, aes(x = tipRate, y = value)) + geom_point(color = "gray50") +
    geom_smooth(method = "lm", se = F, color = "black") + facet_grid(corr ~ name,
    labeller = label_parsed) + scale_y_log10() + scale_x_log10(breaks = c(0.05, 0.1,
    0.5, 1)) + theme_bw() + labs(y = "Genetic diversity", x = "Speciation rate (MCC tree)") +
    theme(strip.text.x = element_blank(), panel.spacing.x = unit(0, "lines"), strip.text.y = element_text(size = 12),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank())


es <- ggplot() + geom_density(data = pgls100, aes(x = Estimate), trim = TRUE) + geom_rug(data = pgls100,
    aes(x = Estimate)) + geom_text(data = pglssumMCC, aes(x = `mean(Estimate)`, y = 1.5,
    label = estimate), size = 3, vjust = 0) + geom_text(data = pglssumMCC, aes(x = `mean(Estimate)`,
    y = 0.4, label = pvalue), size = 3, vjust = 0) + facet_grid(pgls ~ name, labeller = label_parsed) +
    theme_bw() + labs(y = " ", x = "PGLS estimates across 100 posterior trees") +
    theme(strip.text.x = element_blank(), strip.text.y = element_text(size = 12),
        panel.spacing.x = unit(0, "lines"), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
    xlim(-0.48, -0.19)

q8 = ggarrange(ss, NULL, ps, NULL, es, ncol = 1, heights = c(1, -0.15, 1, -0.03,
    1), align = "hv", labels = c("A", "", "B", "", "C"))
q8

ggsave(paste0(fig_path, "FigS9.pdf"), q8, width = 12, height = 8, useDingbats = FALSE)
ggsave(paste0(fig_path, "FigS9.png"), q8, width = 12, height = 8)

Fig. S9. Genetic diversity (\(\theta_{T}\)) estimated per codon site position (A), linear regression with speciation rates (B) and PGLS estimates across 100 posterior trees (all p-values < 0.01) as well as MCC estimate and respective p-value. Data set with at least 10 individuals (1095 species) to allow for more polymorphisms across all codon sites.


Extras

Original vs subsampled measures

pirangeEst <- gen.div0 %>%
    select(species, EstPi, subPi_mean, subPi_min, subPi_max, outPi, subPi_lower.CI,
        subPi_upper.CI) %>%
    arrange(subPi_mean) %>%
    pivot_longer(cols = EstPi:subPi_mean) %>%
    ggplot() + geom_point(aes(y = fct_inorder(species), x = value, alpha = fct_infreq(outPi),
    color = fct_infreq(outPi), shape = name, size = fct_infreq(outPi))) + theme_bw() +
    scale_x_log10() + scale_color_manual(values = c("gray20", "#71D692", "#805C31"),
    labels = c("Included", "With 5 ind.", "Excluded"), name = expression(paste("Included species from ",
        theta["T"]))) + scale_alpha_manual(values = c(0.1, 0.4, 1), labels = c("Included",
    "With 5 ind.", "Excluded"), name = expression(paste("Included species from ",
    theta["T"]))) + scale_size_manual(values = c(0.2, 0.4, 1.5), labels = c("Included",
    "With 5 ind.", "Excluded"), name = expression(paste("Included species from ",
    theta["T"]))) + scale_shape_manual(values = c(1, 2), labels = c(expression(paste("Original ",
    theta["T"])), expression(paste("Mean subsampled ", theta["T"]))), name = "") +
    theme(legend.title = element_text(color = col_v1[95]), axis.title.y = element_blank(),
        axis.text.y = element_blank(), axis.ticks.y = element_blank()) + guides(color = guide_legend(order = 1),
    size = guide_legend(order = 1), alpha = guide_legend(order = 1), shape = guide_legend(order = 2)) +
    labs(x = "")

thetarangeEst <- gen.div0 %>%
    select(species, EstTheta, subTheta_mean, subTheta_min, subTheta_max, outTheta,
        subTheta_lower.CI, subTheta_upper.CI) %>%
    arrange(subTheta_mean) %>%
    pivot_longer(cols = EstTheta:subTheta_mean) %>%
    ggplot() + geom_point(aes(y = fct_inorder(species), x = value, alpha = fct_infreq(outTheta),
    color = fct_infreq(outTheta), shape = name, size = fct_infreq(outTheta))) + theme_bw() +
    scale_x_log10() + scale_color_manual(values = c("gray20", "#71D692", "#805C31"),
    labels = c("Included", "With 5 ind.", "Excluded"), name = expression(paste("Included species from ",
        theta["W"]))) + scale_alpha_manual(values = c(0.1, 0.4, 1), labels = c("Included",
    "With 5 ind.", "Excluded"), name = expression(paste("Included species from ",
    theta["W"]))) + scale_size_manual(values = c(0.2, 0.4, 1.5), labels = c("Included",
    "With 5 ind.", "Excluded"), name = expression(paste("Included species from ",
    theta["W"]))) + scale_shape_manual(values = c(1, 2), labels = c(expression(paste("Original ",
    theta["W"])), expression(paste("Mean subsampled ", theta["W"]))), name = "") +
    theme(legend.title = element_text(color = col_v2[90]), axis.title.y = element_blank(),
        axis.text.y = element_blank(), axis.ticks.y = element_blank()) + guides(color = guide_legend(order = 1),
    size = guide_legend(order = 1), alpha = guide_legend(order = 1), shape = guide_legend(order = 2)) +
    labs(x = "Genetic diversity (logged)")

ggarrange(pirangeEst, thetarangeEst, ncol = 1, labels = "auto")

Genetic diversity is plotted for all species ordered by mean subsampled, showing how many species were excluded because the original estimates done with all samples was outside the range of subsampling genetic diversity 1000 times to 5 individuals.

excP <- gen.div0 %>%
    filter(outPi %in% "out") %>%
    select(species, EstPi, subPi_mean, subPi_min, subPi_max) %>%
    pivot_longer(cols = EstPi:subPi_mean) %>%
    filter(value > 0) %>%
    ggplot() + geom_pointrange(aes(y = species, x = value, xmin = subPi_min, xmax = subPi_max,
    shape = name), color = col_v1[95]) + theme_bw() + scale_x_log10() + scale_shape_manual(values = c(17,
    19), labels = c("Original", "Mean subsampled"), name = "") + labs(x = "", y = expression(paste("Excluded species - ",
    theta["T"]))) + theme(legend.position = "top") + guides(shape = guide_legend(override.aes = list(color = "black")))

excT <- gen.div0 %>%
    filter(outTheta %in% "out") %>%
    select(species, EstTheta, subTheta_mean, subTheta_min, subTheta_max) %>%
    pivot_longer(cols = EstTheta:subTheta_mean) %>%
    filter(value > 0) %>%
    ggplot() + geom_pointrange(aes(y = fct_rev(species), x = value, xmin = subTheta_min,
    xmax = subTheta_max, shape = name), color = col_v2[90], show.legend = F) + theme_bw() +
    scale_x_log10() + scale_shape_manual(values = c(17, 19), labels = c("Original",
    "Mean subsampled"), name = "") + labs(x = "Genetic diversity (logged)", y = expression(paste("Excluded species - ",
    theta["W"])))

ggarrange(excP, excT, ncol = 1, heights = c(0.18, 1))

Plotting which species were excluded with respective original and mean subsampled given each species min and max range of subsampled values of genetic diversity.



ClaDS hyper-parameters

mparClaDS <- parClaDS %>%
    mutate(m = alpha * exp((sigma^2)/2)) %>%
    relocate(m, .before = epsilon) %>%
    pivot_longer(., cols = sigma:epsilon)

hp <- ggplot() + geom_violin(data = filter(mparClaDS, !set %in% "treeMCC"), aes(x = name,
    y = value)) + geom_point(data = filter(mparClaDS, !set %in% "treeMCC"), aes(x = name,
    y = value), size = 0.5, position = "jitter", col = "gray60") + stat_summary(data = filter(mparClaDS,
    !set %in% "treeMCC"), aes(x = name, y = value), fun = "mean", geom = "point",
    size = 2) + geom_point(data = filter(mparClaDS, set %in% "treeMCC"), aes(x = name,
    y = value), color = "red", size = 3, shape = 17) + facet_wrap(~name, scale = "free",
    ncol = 4, labeller = label_parsed) + theme_bw() + theme(axis.title = element_blank(),
    axis.ticks.x = element_blank(), axis.text.x = element_blank(), panel.grid = element_blank())

hp

ggsave(paste0(fig_path, "FigS2.pdf"), hp, width = 5, height = 3, useDingbats = FALSE)

Fig. SXX. ClaDS hyper-parameters \(\alpha\), \(\epsilon\), \(\sigma\) and m = \(\alpha\) × exp(\(\sigma^{2}\) /2) from 100 posterior trees (mean as black circles) and MCC tree (red triangles).


ClaDS vs DR vs BAMM

DR <- read.delim(paste0(folder_path, "/speciation_rate/upham_tipRates_DR_BAMM/DR-SUMMARY_MamPhy_BDvr_Completed_5911sp_topoCons_NDexp_all10k_v2_expanded.txt"),
    header = T, sep = " ") %>%
    rownames_to_column(var = "species") %>%
    mutate(species = word(species, 1, 2, sep = "_")) %>%
    select(species, means) %>%
    rename(DRrate_mean = means)

BAMM <- read.delim(paste0(folder_path, "/speciation_rate/upham_tipRates_DR_BAMM/globalMeanTipRates_sample10_tree1-10_NDexp_wNetDiv.txt"),
    header = T, sep = " ") %>%
    rownames_to_column(var = "species") %>%
    mutate(species = word(species, 1, 2, sep = "_")) %>%
    select(species, arithMeans_Lam)

ratesC1 <- spRate %>%
    filter(!set %in% "treeMCC") %>%
    group_by(species) %>%
    dplyr::summarise(ClaDSrate_mean = mean(tipRate, na.rm = TRUE), clades = unique(clades)) %>%
    left_join(., DR, by = "species") %>%
    left_join(., BAMM, by = "species") %>%
    pivot_longer(cols = DRrate_mean:arithMeans_Lam) %>%
    ggplot(aes(x = value, y = ClaDSrate_mean, color = name)) + geom_point(alpha = 0.3,
    size = 0.9) + geom_smooth(method = "lm") + theme_bw() + labs(y = "Mean tip speciation rates (100 trees)",
    x = "Upham rates", color = "") + scale_x_log10() + scale_y_log10()

ratesC2 <- spRate %>%
    filter(set %in% "treeMCC") %>%
    left_join(., DR, by = "species") %>%
    left_join(., BAMM, by = "species") %>%
    pivot_longer(cols = DRrate_mean:arithMeans_Lam) %>%
    ggplot(aes(x = value, y = tipRate, color = name)) + geom_point(alpha = 0.3, size = 0.9) +
    geom_smooth(method = "lm") + theme_bw() + labs(y = "MCC tip speciation rates",
    x = "Upham rates", color = "") + scale_x_log10() + scale_y_log10()

ggarrange(ratesC1, ratesC2, ncol = 2, common.legend = T)

gendivSpRate3 <- gendivSpRate %>%
    left_join(., DR, by = "species") %>%
    left_join(., BAMM, by = "species") %>%
    filter(set %in% "treeMCC") %>%
    select(species, EstPi, DRrate_mean, arithMeans_Lam, tipRate) %>%
    drop_na()

# TreeSubset <- drop.tip(treeMCC,
# as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
# gendivSpRate3$species)])) datacomp <- comparative.data(data = gendivSpRate3,
# phy = TreeSubset, names.col = 'species', vcv = TRUE, na.omit = TRUE,
# warn.dropped = TRUE) modDR <- pgls(log(EstPi) ~ log(DRrate_mean), data =
# datacomp, lambda = 'ML') saveRDS(modDR,
# paste0(folder_path,'pgls/extra/pgls_piDR.rds'))
modDR <- readRDS(paste0(folder_path, "pgls/extra/pgls_piDR.rds"))
summary(modDR)
## 
## Call:
## pgls(formula = log(EstPi) ~ log(DRrate_mean), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38899 -0.06502  0.00120  0.06664  0.29202 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.522
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.397, 0.632)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                   Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)      -4.777975   0.589072 -8.1110 8.882e-16 ***
## log(DRrate_mean) -0.303231   0.054457 -5.5683 2.945e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1002 on 1874 degrees of freedom
## Multiple R-squared: 0.01628, Adjusted R-squared: 0.01575 
## F-statistic: 31.01 on 1 and 1874 DF,  p-value: 2.945e-08
summary(lm(log(tipRate) ~ log(DRrate_mean), data = gendivSpRate3))
## 
## Call:
## lm(formula = log(tipRate) ~ log(DRrate_mean), data = gendivSpRate3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8161 -0.2113 -0.0066  0.1898  0.9640 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -0.29982    0.01968  -15.23   <2e-16 ***
## log(DRrate_mean)  0.74552    0.01186   62.86   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2876 on 1874 degrees of freedom
## Multiple R-squared:  0.6783, Adjusted R-squared:  0.6781 
## F-statistic:  3952 on 1 and 1874 DF,  p-value: < 2.2e-16
# modBAMM <- pgls(log(EstPi) ~ log(arithMeans_Lam), data = datacomp, lambda =
# 'ML') saveRDS(modBAMM, paste0(folder_path,'pgls/extra/pgls_piBAMM.rds'))
modBAMM <- readRDS(paste0(folder_path, "pgls/extra/pgls_piBAMM.rds"))
summary(modBAMM)
## 
## Call:
## pgls(formula = log(EstPi) ~ log(arithMeans_Lam), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.32230 -0.06630  0.00367  0.07095  0.35680 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.524
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.401, 0.633)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                     Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)         -5.35530    0.63434 -8.4423 < 2.2e-16 ***
## log(arithMeans_Lam) -0.73664    0.14576 -5.0537 4.754e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1006 on 1874 degrees of freedom
## Multiple R-squared: 0.01345, Adjusted R-squared: 0.01292 
## F-statistic: 25.54 on 1 and 1874 DF,  p-value: 4.754e-07

### try to get pgls of spRate ~ pi but doesn't work
MCCgendivSpRate <- gendivSpRate %>%
    filter(set %in% "treeMCC", !is.na(EstPi))

TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
    MCCgendivSpRate$species)]))

datacomp <- comparative.data(data = MCCgendivSpRate, phy = TreeSubset, names.col = "species",
    vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)

modspRatePi <- pgls(log(tipRate) ~ log(EstPi), data = datacomp, lambda = "ML")
saveRDS(modspRatePi, paste0(folder_path, "pgls/extra/pgls_spRatePi.rds"))
spRatePi <- readRDS(paste0(folder_path, "pgls/extra/pgls_spRatePi.rds"))
summary(modspRatePi)
summary(lm(data = MCCgendivSpRate, log(tipRate) ~ log(EstPi)))  ## still suggest correlation even if not able to account for the phylogeny


dtset <- datacomp$data %>%
    rownames_to_column("species")

fit1 <- gls(log(tipRate) ~ log(EstPi), data = dtset, correlation = corPagel(1, phy = datacomp$phy,
    form = ~species), method = "ML")

summary(fit1)

Statistically comparing how different are the per site correlations

Using a multivariate analysis of variance (MANOVA):

load(paste0(folder_path, "speciation_rate/output/upham_4064sp_FR_MCCposterior100.rdata"))
GendivSpRate10 <- readRDS(paste0(folder_path, "genetic_diversity/GendivSpRate10.rds")) %>%
    filter(set %in% "treeMCC")

MCCtree <- TreeSet[["treeMCC"]]

TreeSubset <- drop.tip(MCCtree, as.character(MCCtree$tip.label[which(!MCCtree$tip.label %in%
    GendivSpRate10$species)]))

## install_github('JClavel/mvMORPH', ref='devel_1.1.5')
library(mvMORPH)

dtset <- cbind(GendivSpRate10$EstPi_1, GendivSpRate10$EstPi_2, GendivSpRate10$EstPi_3)
rownames(dtset) <- GendivSpRate10$species
dtset <- list(genDiv = log(dtset), spRate = log(GendivSpRate10$tipRate))

fit_mult <- mvgls(genDiv ~ spRate, data = dtset, tree = TreeSubset, model = "lambda",
    method = "LL")

fit_mult
## 
## Call:
## mvgls(formula = genDiv ~ spRate, data = dtset, tree = TreeSubset, 
##     model = "lambda", method = "LL")
## 
## 
## Generalized least squares fit by REML 
## Log-restricted-likelihood: -3652.189 
## 
## 
## Parameter estimate(s):
## lambda: 0.3435 
## 
## 
## Covariance matrix of size: 3 by 3 
## for 1095 observations 
## 
## Coefficients:
##              [,1]     [,2]     [,3]   
## (Intercept)  -5.4512  -6.6377  -3.9131
## spRate       -0.4286  -0.2445  -0.4060
manova.gls(fit_mult, nperm = 999)
## Sequential MANOVA Tests: Pillai test statistic 
##        Df test stat approx F num Df den Df   Pr(>F)    
## spRate  1   0.03493    13.16      3   1091 1.91e-08 ***
## --- 
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
manFit <- manova.gls(fit_mult, nperm = 999)  ### significant
effectsize(manFit)  # you can also have effect-size the equivalent of
## ## Multivariate measure(s) of association ## 
##               spRate
## ξ^2 [Pillai] 0.03493
# R2 for multivariate using the effectsize() function on the manova.gls object
# in the devel version of mvMORPH.

Test for differences between the responses using tests for “repeated measures”

Pi1 vs Pi2

# test for differences between the responses using tests for 'repeated
# measures' (just a way to compare div1,2... by considering them like if they
# were different measures of the same thing): P=matrix(c(0,1,-1,0) ##
# '1*diversity_1 - 1*diversity_2 + 0*diversity_3 = 0'

P1 = matrix(c(1, -1, 0), nrow = 3)  #'diversity_1' to  'diversity_2'
manova.gls(fit_mult, P = P1, L = c(0, 1), nperm = 999)
## General Linear Hypothesis Test (repeated measures design): Pillai test statistic 
##             Df test stat approx F num Df den Df   Pr(>F)   
## Contrasts L  1  0.006649    7.317      1   1093 0.006939 **
## --- 
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Pi2 vs Pi3

P2 = matrix(c(0, 1, -1), nrow = 3)  # 'diversity_2' to 'diversity_3'
manova.gls(fit_mult, P = P2, L = c(0, 1), nperm = 999)
## General Linear Hypothesis Test (repeated measures design): Pillai test statistic 
##             Df test stat approx F num Df den Df  Pr(>F)  
## Contrasts L  1   0.00448    4.919      1   1093 0.02677 *
## --- 
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Pi1 vs Pi3

P3 = matrix(c(1, 0, -1), nrow = 3)  # 'diversity_1' to 'diversity_3'
manova.gls(fit_mult, P = P3, L = c(0, 1), nperm = 999)
## General Linear Hypothesis Test (repeated measures design): Pillai test statistic 
##             Df test stat approx F num Df den Df Pr(>F) 
## Contrasts L  1 0.0002665   0.2913      1   1093 0.5895 
## --- 
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Second option for Fig. 7 Traits results from posterior trees dataset

### PGLS
pglsResulttraits2 <- pglsResulttraits %>%
    mutate(size = case_when(set %in% "treeMCC" ~ "MCC", TRUE ~ "Posterior")) %>%
    arrange(desc(size))

ppgls2 <- ggplot(pglsResulttraits2) + geom_point(aes(y = Estimate, x = Term, group = set,
    size = size, colour = Term, shape = fct_rev(pvalueBin)), alpha = 0.4, stroke = 0.2,
    position = position_dodge(width = 0.9)) + geom_hline(yintercept = 0, size = 0.2) +
    theme_bw() + facet_wrap(~analysis, nrow = 1, drop = T, scales = "free_x", labeller = label_parsed) +
    scale_shape_manual(name = "PGLS sig.", values = c(19, 1), labels = c("< 0.05",
        "> 0.05")) + scale_colour_manual(name = "Models terms", values = iwanthue(6),
    guide = guide_legend(override.aes = list(alpha = 1), nrow = 3)) + scale_size_manual(name = "Tree set",
    labels = c("MCC tree", "Posterior trees"), values = c(3.5, 1)) + theme(legend.position = "top",
    plot.margin = unit(c(0, 0, 0, 0), "cm"), legend.spacing.y = unit(0.1, "cm"),
    panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.ticks.x = element_blank(),
    axis.text.x = element_blank()) + labs(y = "PGLS estimates", x = "") + guides(size = guide_legend(nrow = 3,
    byrow = TRUE), shape = guide_legend(nrow = 3, byrow = TRUE))

### BMLM
BMLMglobal_traitssum2 <- BMLMglobal_traitsExtPos %>%
    group_by(Term, analysis, set) %>%
    median_qi(Estimate)

pbmlm2 <- ggplot() + geom_pointinterval(data = filter(BMLMglobal_traitssum2, set %in%
    "treeMCC"), aes(y = Estimate, x = Term, ymin = .lower, ymax = .upper, group = set,
    color = Term), alpha = 0.7, point_alpha = 0.5, point_size = 3, size = 3, position = position_dodge(width = 0.9)) +
    geom_pointinterval(data = filter(BMLMglobal_traitssum2, !set %in% "treeMCC"),
        aes(y = Estimate, x = Term, ymin = .lower, ymax = .upper, group = set, color = Term),
        alpha = 0.15, point_alpha = 0.07, point_size = 1, size = 0.5, position = position_jitterdodge(jitter.width = 0.7,
            dodge.width = 0.9)) + facet_wrap(~analysis, nrow = 1, drop = T, scales = "free_x",
    labeller = label_parsed) + geom_hline(yintercept = 0, size = 0.2) + scale_color_manual(name = "Models terms",
    values = iwanthue(6)) + theme_bw() + theme(legend.position = "bottom", strip.background = element_blank(),
    strip.text.x = element_blank(), plot.margin = unit(c(-0.5, 0, 0, 0), "cm"), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.ticks.x = element_blank(), axis.text.x = element_blank()) +
    # axis.text.x = element_text(angle = 30, hjust = 1 )) +
labs(y = "BMLM estimates", x = "")

qs42 = ggpubr::ggarrange(ppgls2, pbmlm2, ncol = 1, nrow = 2, common.legend = T, legend = "bottom",
    align = "v", heights = c(1, 0.9))
qs42

ggsave(paste0(fig_path, "FigS6_2.pdf"), qs42, width = 8, height = 6, useDingbats = FALSE)

Fig. S7. Results for the three models to test the effect of including life-history traits (body mass, species range area, mean annual temperature averaged across species range, litter size and generation length) in the model with genetic diversity and speciation rate. The model with speciation rate as predictor suggest the association between genetic diversity and speciation rate is not due to traits affecting both variables. Analyses done for set of 100 posterior trees and MCC tree, showing estimates significance for PGLS analyses (p-value < 0.05) and 95% confidence intervals around median estimates for BMLM analyses.


Correlations between variables

## mutation rate vs speciation rate
mgsubendivSpRateMutRate <- gendivSpRateMutRate %>%
    filter(!set %in% "treeMCC") %>%
    group_by(species) %>%
    dplyr::summarise(SpRate_mean = mean(tipRate, na.rm = TRUE), SpRate_lower.ci = CI(tipRate,
        ci = 0.95)[1], SpRate_upper.ci = CI(tipRate, ci = 0.95)[3], MutRate_mean = mean(mutRate,
        na.rm = TRUE), MutRate_lower.ci = CI(mutRate, ci = 0.95)[1], MutRate_upper.ci = CI(mutRate,
        ci = 0.95)[3], Ne_mean = mean(Ne, na.rm = TRUE), Ne_lower.ci = CI(Ne, ci = 0.95)[1],
        Ne_upper.ci = CI(Ne, ci = 0.95)[3]) %>%
    drop_na() %>%
    left_join(., clades, by = "species") %>%
    filter(clades %in% names(tcol[2:15])) %>%
    left_join(., gen.div[, c("species", "EstPi", "subPi_mean", "subPi_upper.CI",
        "subPi_lower.CI")], by = "species")
library(GGally)
mmdata <- mgsubendivSpRateMutRate %>%
    select(species, clades, SpRate_mean, MutRate_mean, Ne_mean, EstPi) %>%
    left_join(., traitData[-7], by = "species") %>%
    pivot_longer(cols = SpRate_mean:BodyMassKg_notInputed)

mdata <- pivot_wider(mmdata, names_from = name, values_from = value) %>%
    select(-mean_temp)

cors <- ggpairs(mdata, columns = 3:9, upper = list(continuous = wrap("smooth", alpha = 0.3,
    size = 0.1, se = F)), diag = "blank", lower = "blank", mapping = aes(color = clades),
    legend = 2) + scale_y_log10() + scale_x_log10() + scale_color_manual(values = tcol[2:15]) +
    theme_bw() + theme(legend.position = "bottom")
cors

p0 = ggplot(mgsubendivSpRateMutRate, aes(y = subPi_mean, x = SpRate_mean)) + geom_errorbarh(aes(color = clades,
    xmin = SpRate_lower.ci, xmax = SpRate_upper.ci)) + geom_errorbar(aes(color = clades,
    ymin = subPi_lower.CI, ymax = subPi_upper.CI)) + geom_point(aes(color = clades),
    size = 0.9) + geom_smooth(method = lm, position = "identity") + scale_x_log10(breaks = breaks_log(n = 8),
    labels = label_number()) + scale_y_log10(labels = label_scientific(), breaks = breaks_log(n = 6)) +
    theme_bw() + labs(x = expression(paste("Speciation rate - ", lambda)), y = expression(paste("Genetic diversity - ",
    theta["T"], " Subsampled to 5 individuals"))) + scale_colour_manual(values = tcol[-15]) +
    theme(legend.position = "none")

p1 = ggplot(mgsubendivSpRateMutRate, aes(y = subPi_mean, x = MutRate_mean)) + geom_errorbarh(aes(color = clades,
    xmin = MutRate_lower.ci, xmax = MutRate_upper.ci)) + geom_errorbar(aes(color = clades,
    ymin = subPi_lower.CI, ymax = subPi_upper.CI)) + geom_point(aes(color = clades),
    size = 0.9) + geom_smooth(method = lm, position = "identity") + scale_x_log10(breaks = breaks_log(n = 8),
    labels = label_scientific()) + scale_y_log10(labels = label_scientific(), breaks = breaks_log(n = 6)) +
    theme_bw() + labs(x = expression(paste("Mutation rate - ", mu)), y = expression(paste("Genetic diversity - ",
    theta["T"], " Subsampled to 5 individuals"))) + scale_colour_manual(values = tcol[-15]) +
    theme(legend.position = "none")

p2 = ggplot(mgsubendivSpRateMutRate, aes(y = subPi_mean, x = Ne_mean)) + geom_errorbarh(aes(color = clades,
    xmin = Ne_lower.ci, xmax = Ne_upper.ci)) + geom_errorbar(aes(color = clades,
    ymin = subPi_lower.CI, ymax = subPi_upper.CI)) + geom_point(aes(color = clades),
    size = 0.9) + geom_smooth(method = lm, position = "identity") + scale_x_log10(breaks = breaks_log(n = 8),
    labels = label_scientific()) + scale_y_log10(labels = label_scientific(), breaks = breaks_log(n = 6)) +
    theme_bw() + labs(x = expression(paste(N["e"])), y = expression(paste("Genetic diversity - ",
    theta["T"], " Subsampled to 5 individuals"))) + scale_colour_manual(values = tcol[-15]) +
    theme(legend.position = "none")

q <- ggarrange(p0, p1, p2, ncol = 3)

mmdata <- mgsubendivSpRateMutRate %>%
    select(species, clades, SpRate_mean, MutRate_mean, Ne_mean, EstPi) %>%
    pivot_longer(cols = SpRate_mean:EstPi)

q2 <- ggplot(mmdata, aes(y = value, color = clades, x = clades)) + geom_boxplot(show.legend = FALSE) +
    facet_wrap(~name, nrow = 4, ncol = 1, scales = "free_y", strip.position = "right") +
    scale_y_log10() + scale_color_manual(values = tcol[-15]) + theme_bw() + theme(legend.position = "none")

pq <- ggarrange(q, q2, nrow = 2, heights = c(0.5, 1))
pq

mmdata <- mgsubendivSpRateMutRate %>%
    select(species, clades, SpRate_mean, MutRate_mean, Ne_mean, EstPi) %>%
    left_join(., traitData[-7], by = "species") %>%
    pivot_longer(cols = SpRate_mean:BodyMassKg_notInputed)

s1 <- ggplot(filter(mmdata, name %in% "EstPi"), aes(y = value, color = clades)) +
    geom_boxplot(show.legend = FALSE) + facet_wrap(~clades, ncol = 1, strip.position = "right") +
    scale_y_log10() + scale_color_manual(values = tcol[-15]) + theme_bw() + labs(y = "",
    x = expression(paste(theta["T"]))) + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(),
    strip.background = element_blank(), strip.text.y = element_blank())

s2 <- ggplot(filter(mmdata, name %in% "MutRate_mean"), aes(y = value, color = clades)) +
    geom_boxplot(show.legend = FALSE) + facet_wrap(~clades, ncol = 1, strip.position = "right") +
    scale_y_log10() + scale_color_manual(values = tcol[-15]) + theme_bw() + labs(y = "",
    x = expression(mu)) + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(),
    strip.background = element_blank(), strip.text.y = element_blank())

s3 <- ggplot(filter(mmdata, name %in% "Ne_mean"), aes(y = value, color = clades)) +
    geom_boxplot(show.legend = FALSE) + facet_wrap(~clades, ncol = 1, strip.position = "right") +
    scale_y_log10() + scale_color_manual(values = tcol[-15]) + theme_bw() + labs(y = "",
    x = expression(paste(N["e"]))) + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(),
    strip.background = element_blank(), strip.text.y = element_blank())

s4 <- ggplot(filter(mmdata, name %in% "SpRate_mean"), aes(y = value, color = clades)) +
    geom_boxplot(show.legend = FALSE) + facet_wrap(~clades, ncol = 1, strip.position = "right") +
    scale_y_log10() + scale_color_manual(values = tcol[-15]) + theme_bw() + labs(y = "",
    x = expression(lambda)) + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank())
# strip.background = element_blank(), strip.text.y = element_blank() )

s5 <- ggplot(filter(mmdata, name %in% "BodyMassKg_notInputed"), aes(y = value, color = clades)) +
    geom_boxplot(show.legend = FALSE) + facet_wrap(~clades, ncol = 1, strip.position = "right") +
    scale_y_log10() + scale_color_manual(values = tcol[-15]) + theme_bw() + labs(y = "",
    x = "Body mass (kg)") + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(),
    strip.background = element_blank(), strip.text.y = element_blank())

s6 <- ggplot(filter(mmdata, name %in% "geoArea_km2"), aes(y = value, color = clades)) +
    geom_boxplot(show.legend = FALSE) + facet_wrap(~clades, ncol = 1, strip.position = "right") +
    scale_y_log10() + scale_color_manual(values = tcol[-15]) + theme_bw() + labs(y = "",
    x = "Range area (km2)") + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(),
    strip.background = element_blank(), strip.text.y = element_blank())

s7 <- ggplot(filter(mmdata, name %in% "GenerationLength_d"), aes(y = value, color = clades)) +
    geom_boxplot(show.legend = FALSE) + facet_wrap(~clades, ncol = 1, strip.position = "right") +
    scale_y_log10() + scale_color_manual(values = tcol[-15]) + theme_bw() + labs(y = "",
    x = "Generation length (d)") + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(),
    strip.background = element_blank(), strip.text.y = element_blank())

ggarrange(s1, s2, s3, s7, s5, s6, s4, ncol = 7)


Life-history specific tests

Traits plotting

usedTraitDataset <-  gendivSpRateTrait %>% 
  filter(set %in% 'treeMCC') %>% 
  select(-DispDist_notInputed) %>%
  drop_na()

names <- c(BodyMassKg_notInputed = "Body mass",
           geoArea_km2 = "Range area",
           mean_temp = "Mean range temperature",
           litter_or_clutch_size_n = "Litter size",
           GenerationLength_d = "Generation length")

divTraits <- usedTraitDataset %>% 
  select(species, EstPi, BodyMassKg_notInputed, geoArea_km2,  mean_temp, litter_or_clutch_size_n, GenerationLength_d) %>%
  pivot_longer(cols=BodyMassKg_notInputed:GenerationLength_d) %>%
  ggplot(aes(y = EstPi, x = value)) +
  geom_point(size = 0.5, alpha = 0.4) + 
  geom_smooth(method='lm', se = F) + 
  facet_wrap(~name, scale = "free", ncol = 1, 
             labeller = labeller(name = names),
             strip.position = "bottom") + 
  labs(x="", y = expression(paste("Genetic diversity " (theta['T']))),
       title = "Genetic diversity") +
  scale_y_log10() + 
  scale_x_log10() +
  theme_bw() + 
  theme(#aspect.ratio = 1,
    strip.background = element_blank(),
    strip.placement = "outside")

spRateTraits <- usedTraitDataset %>% 
  select(species, tipRate, BodyMassKg_notInputed, geoArea_km2,  mean_temp, litter_or_clutch_size_n, GenerationLength_d) %>%
  pivot_longer(cols=BodyMassKg_notInputed:GenerationLength_d) %>%
  ggplot(aes(y = tipRate, x = value)) +
  geom_point(size = 0.5, alpha = 0.4) + 
  geom_smooth(method='lm', se = F) + 
  facet_wrap(~name, scale = "free", ncol = 1, 
             labeller = labeller(name = names),
             strip.position = "bottom") + 
  labs(x="", y = "MCC tree tip speciation rates", 
       title = "Speciation rates") +
  scale_y_log10() + 
  scale_x_log10() +
  theme_bw() + 
  theme(#aspect.ratio = 1,
    strip.background = element_blank(),
    strip.placement = "outside")

Traits <- usedTraitDataset %>% 
  select(species, BodyMassKg_notInputed, geoArea_km2,  mean_temp, litter_or_clutch_size_n, GenerationLength_d) %>%
  pivot_longer(cols=BodyMassKg_notInputed:GenerationLength_d) %>%
  ggplot(aes(x = value)) +
  geom_histogram() +
  facet_wrap(~name, scale = "free", ncol = 1, 
             labeller = labeller(name = names),
             strip.position = "bottom") + 
  labs(x = "", y = "Counts") +
  scale_x_log10() +
  theme_bw() + 
  theme(panel.grid = element_blank(),
        #aspect.ratio = 1,
        strip.background = element_blank(),
        strip.placement = "outside")

tds <- ggarrange(Traits, divTraits, spRateTraits,  ncol = 3, align = 'hv')
tds

ggsave(paste0(fig_path, 'other/FigSXX_traitsPlotting.pdf'), tds, width = 7, height = 10, useDingbats = FALSE )

Fig. SXX. Dataset used in analyses with life-history traits (1004 spp.). Histogram for each trait with x-axis log scaled (A). Linear relationship between genetic diversity (B) and tip speciation rates (from MCC tree; C) with each used trait, both axis are log scaled.


Check for multicollinearity

# http://www.mpcm-evolution.com/practice/online-practical-material-chapter-6/chapter-6-1exercises-testing-assumptions-statistical-issues-framework-phylogenetic-generalized-least-squares
library(car)

### prepare data to be the same as used for PGLS analyses
load(paste0(folder_path, "speciation_rate/output/upham_4064sp_FR_MCCposterior100.rdata"))  #loads TreeSet

DataSubsetMCC <- gendivSpRateTrait %>%
    filter(set %in% "treeMCC") %>%
    select(species, clades, tipRate, EstPi, BodyMassKg_notInputed, geoArea_km2, mean_temp,
        litter_or_clutch_size_n, GenerationLength_d) %>%
    drop_na()

treeMCC <- TreeSet[["treeMCC"]]

TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
    DataSubsetMCC$species)]))

datacomp <- comparative.data(data = DataSubsetMCC, phy = TreeSubset, names.col = "species",
    vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)

lm.res = lm(log(EstPi) ~ log(tipRate) + log(BodyMassKg_notInputed) + log(geoArea_km2) +
    log(mean_temp) + log(litter_or_clutch_size_n) + log(GenerationLength_d), data = DataSubsetMCC)


vif(mod = lm.res)
##                 log(tipRate)   log(BodyMassKg_notInputed) 
##                     1.063279                     1.704704 
##             log(geoArea_km2)               log(mean_temp) 
##                     1.053977                     1.164662 
## log(litter_or_clutch_size_n)      log(GenerationLength_d) 
##                     2.102306                     3.063934

Using variance inflation factors we can verify if there is a big problem with multicollinearity for the linear model (not sure if there is a way to verify this for a PGLS). Since all values are below 10, which normally values above is commonly considered indicative of a problem, it seems unlikely the collinearity between these variables would be a problem.


Body mass

par(mfrow = c(2, 2))
summary(lm(log(EstPi) ~ log(BodyMassKg_notInputed), data = datacomp$data))
## 
## Call:
## lm(formula = log(EstPi) ~ log(BodyMassKg_notInputed), data = datacomp$data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1749 -0.6113  0.1553  0.6946  1.9319 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                -4.10449    0.03443 -119.22  < 2e-16 ***
## log(BodyMassKg_notInputed) -0.08152    0.01041   -7.83 1.24e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9647 on 1002 degrees of freedom
## Multiple R-squared:  0.05766,    Adjusted R-squared:  0.05672 
## F-statistic: 61.31 on 1 and 1002 DF,  p-value: 1.238e-14
# modpiBM <- pgls(log(EstPi) ~ log(BodyMassKg_notInputed), data = datacomp,
# lambda = 'ML') saveRDS(modpiBM,
# paste0(folder_path,'pgls/extra/pglspiBM.rds'))
modpiBM <- readRDS(paste0(folder_path, "pgls/extra/pglspiBM.rds"))
summary(modpiBM)
## 
## Call:
## pgls(formula = log(EstPi) ~ log(BodyMassKg_notInputed), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30828 -0.06564 -0.00734  0.05576  0.31807 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.568
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.410, 0.696)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                            Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                -4.01495    0.56474 -7.1094 2.212e-12 ***
## log(BodyMassKg_notInputed) -0.12415    0.02292 -5.4168 7.598e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09526 on 1002 degrees of freedom
## Multiple R-squared: 0.02845, Adjusted R-squared: 0.02748 
## F-statistic: 29.34 on 1 and 1002 DF,  p-value: 7.598e-08
plot(modpiBM)

summary(lm(log(tipRate) ~ log(BodyMassKg_notInputed), data = datacomp$data))
## 
## Call:
## lm(formula = log(tipRate) ~ log(BodyMassKg_notInputed), data = datacomp$data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.89281 -0.27891 -0.01674  0.29959  1.58359 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                -1.468066   0.017924 -81.904   <2e-16 ***
## log(BodyMassKg_notInputed)  0.011841   0.005421   2.184   0.0292 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5022 on 1002 degrees of freedom
## Multiple R-squared:  0.004739,   Adjusted R-squared:  0.003746 
## F-statistic: 4.771 on 1 and 1002 DF,  p-value: 0.02917
# modspRateBM <- pgls(log(tipRate) ~ log(BodyMassKg_notInputed), data =
# datacomp, lambda = 'ML') saveRDS(modspRateBM,
# paste0(folder_path,'pgls/extra/pglsspRateBM.rds'))
modspRateBM <- readRDS(paste0(folder_path, "pgls/extra/pglsspRateBM.rds"))
summary(modspRateBM)
## 
## Call:
## pgls(formula = log(tipRate) ~ log(BodyMassKg_notInputed), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.27134 -0.04183 -0.00030  0.03999  0.25566 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.993
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.991, 0.996)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                              Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                -2.5229203  0.4957941 -5.0886 4.305e-07 ***
## log(BodyMassKg_notInputed)  0.0041917  0.0088750  0.4723    0.6368    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06804 on 1002 degrees of freedom
## Multiple R-squared: 0.0002226,   Adjusted R-squared: -0.0007752 
## F-statistic: 0.2231 on 1 and 1002 DF,  p-value: 0.6368
plot(modspRateBM)

summary(lm(log(EstPi) ~ log(tipRate) + log(BodyMassKg_notInputed), data = datacomp$data))
## 
## Call:
## lm(formula = log(EstPi) ~ log(tipRate) + log(BodyMassKg_notInputed), 
##     data = datacomp$data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7804 -0.5955  0.1438  0.6774  2.0556 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                -4.63779    0.09383 -49.429  < 2e-16 ***
## log(tipRate)               -0.36327    0.05962  -6.094 1.57e-09 ***
## log(BodyMassKg_notInputed) -0.07722    0.01025  -7.531 1.12e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9478 on 1001 degrees of freedom
## Multiple R-squared:  0.09136,    Adjusted R-squared:  0.08955 
## F-statistic: 50.32 on 2 and 1001 DF,  p-value: < 2.2e-16
# modpirateBM <- pgls(log(EstPi) ~ log(tipRate) + log(BodyMassKg_notInputed),
# data = datacomp, lambda = 'ML') saveRDS(modpirateBM,
# paste0(folder_path,'pgls/extra/pglspirateBM.rds'))
modpirateBM <- readRDS(paste0(folder_path, "pgls/extra/pglspirateBM.rds"))
summary(modpirateBM)
## 
## Call:
## pgls(formula = log(EstPi) ~ log(tipRate) + log(BodyMassKg_notInputed), 
##     data = datacomp, lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.25237 -0.05717  0.00122  0.06204  0.29693 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.474
##    lower bound : 0.000, p = 9.9143e-14
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.297, 0.629)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                -4.842380   0.509536 -9.5035 < 2.2e-16 ***
## log(tipRate)               -0.381034   0.078052 -4.8818 1.223e-06 ***
## log(BodyMassKg_notInputed) -0.118374   0.021318 -5.5528 3.600e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08714 on 1001 degrees of freedom
## Multiple R-squared: 0.05154, Adjusted R-squared: 0.04964 
## F-statistic:  27.2 on 2 and 1001 DF,  p-value: 3.153e-12
plot(modpirateBM)

Range Area

par(mfrow = c(2, 2))
summary(lm(log(EstPi) ~ log(geoArea_km2), data = datacomp$data))
## 
## Call:
## lm(formula = log(EstPi) ~ log(geoArea_km2), data = datacomp$data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.8492 -0.6255  0.1365  0.7316  1.9273 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -5.06969    0.20325 -24.943  < 2e-16 ***
## log(geoArea_km2)  0.07857    0.01447   5.431 7.02e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9795 on 1002 degrees of freedom
## Multiple R-squared:  0.0286, Adjusted R-squared:  0.02763 
## F-statistic:  29.5 on 1 and 1002 DF,  p-value: 7.023e-08
# modpiArea <- pgls(log(EstPi) ~ log(geoArea_km2), data = datacomp, lambda =
# 'ML') saveRDS(modpiArea, paste0(folder_path,'pgls/extra/pglspiArea.rds'))
modpiArea <- readRDS(paste0(folder_path, "pgls/extra/pglspiArea.rds"))
summary(modpiArea)  ### positivelly correlated
## 
## Call:
## pgls(formula = log(EstPi) ~ log(geoArea_km2), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.301173 -0.061473 -0.000233  0.067502  0.255721 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.618
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.476, 0.730)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                   Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)      -5.643268   0.633533 -8.9076 < 2.2e-16 ***
## log(geoArea_km2)  0.123812   0.014847  8.3390  2.22e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09801 on 1002 degrees of freedom
## Multiple R-squared: 0.0649,  Adjusted R-squared: 0.06396 
## F-statistic: 69.54 on 1 and 1002 DF,  p-value: 2.454e-16
plot(modpiArea)

summary(lm(log(tipRate) ~ log(geoArea_km2), data = datacomp$data))
## 
## Call:
## lm(formula = log(tipRate) ~ log(geoArea_km2), data = datacomp$data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.87475 -0.28650 -0.00506  0.30714  1.48899 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -1.079662   0.103657  -10.42  < 2e-16 ***
## log(geoArea_km2) -0.029287   0.007378   -3.97 7.71e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4995 on 1002 degrees of freedom
## Multiple R-squared:  0.01548,    Adjusted R-squared:  0.0145 
## F-statistic: 15.76 on 1 and 1002 DF,  p-value: 7.714e-05
# modspRateArea <- pgls(log(tipRate) ~ log(geoArea_km2), data = datacomp,
# lambda = 'ML') saveRDS(modspRateArea,
# paste0(folder_path,'pgls/extra/pglsspRateArea.rds'))
modspRateArea <- readRDS(paste0(folder_path, "pgls/extra/pglsspRateArea.rds"))
summary(modspRateArea)  ### not correlated
## 
## Call:
## pgls(formula = log(tipRate) ~ log(geoArea_km2), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.270816 -0.045761 -0.006045  0.035765  0.240372 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.993
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.990, 0.996)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                    Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)      -2.4666147  0.4963273 -4.9697 7.881e-07 ***
## log(geoArea_km2) -0.0042257  0.0028760 -1.4693    0.1421    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0679 on 1002 degrees of freedom
## Multiple R-squared: 0.00215, Adjusted R-squared: 0.001154 
## F-statistic: 2.159 on 1 and 1002 DF,  p-value: 0.1421
plot(modspRateArea)

summary(lm(log(EstPi) ~ log(tipRate) + log(geoArea_km2), data = datacomp$data))
## 
## Call:
## lm(formula = log(EstPi) ~ log(tipRate) + log(geoArea_km2), data = datacomp$data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6110 -0.6042  0.1416  0.7106  1.9175 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -5.45641    0.21048 -25.924  < 2e-16 ***
## log(tipRate)     -0.35819    0.06093  -5.878 5.64e-09 ***
## log(geoArea_km2)  0.06808    0.01434   4.747 2.36e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9635 on 1001 degrees of freedom
## Multiple R-squared:  0.06101,    Adjusted R-squared:  0.05914 
## F-statistic: 32.52 on 2 and 1001 DF,  p-value: 2.07e-14
# modpirateArea <- pgls(log(EstPi) ~ log(tipRate) + log(geoArea_km2), data =
# datacomp, lambda = 'ML') saveRDS(modpirateArea,
# paste0(folder_path,'pgls/extra/pglspirateArea.rds'))
modpirateArea <- readRDS(paste0(folder_path, "pgls/extra/pglspirateArea.rds"))
summary(modpirateArea)  ### as expected, spRate negatively correlated and geographic range positively
## 
## Call:
## pgls(formula = log(EstPi) ~ log(tipRate) + log(geoArea_km2), 
##     data = datacomp, lambda = "ML")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.284495 -0.064448 -0.006408  0.054472  0.283588 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.563
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.406, 0.693)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                   Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept)      -6.245061   0.603871 -10.3417 < 2.2e-16 ***
## log(tipRate)     -0.308851   0.081035  -3.8113 0.0001467 ***
## log(geoArea_km2)  0.117460   0.014842   7.9141 6.661e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09243 on 1001 degrees of freedom
## Multiple R-squared: 0.07753, Adjusted R-squared: 0.07568 
## F-statistic: 42.06 on 2 and 1001 DF,  p-value: < 2.2e-16
plot(modpirateArea)

Mean Temperature

par(mfrow = c(2, 2))
summary(lm(log(EstPi) ~ log(mean_temp), data = datacomp$data))
## 
## Call:
## lm(formula = log(EstPi) ~ log(mean_temp), data = datacomp$data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1106 -0.6373  0.1544  0.7356  2.3511 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -6.16377    0.46104  -13.37  < 2e-16 ***
## log(mean_temp)  0.37988    0.07997    4.75 2.33e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9828 on 1002 degrees of freedom
## Multiple R-squared:  0.02203,    Adjusted R-squared:  0.02105 
## F-statistic: 22.57 on 1 and 1002 DF,  p-value: 2.326e-06
# modpiTemp <- pgls(log(EstPi) ~ log(mean_temp), data = datacomp, lambda =
# 'ML') saveRDS(modpiTemp, paste0(folder_path,'pgls/extra/pglspiTemp.rds'))
modpiTemp <- readRDS(paste0(folder_path, "pgls/extra/pglspiTemp.rds"))
summary(modpiTemp)
## 
## Call:
## pgls(formula = log(EstPi) ~ log(mean_temp), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35028 -0.06019  0.00117  0.06128  0.29070 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.555
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.392, 0.687)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                 Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)    -6.138505   0.767286 -8.0003 3.553e-15 ***
## log(mean_temp)  0.375371   0.090557  4.1452 3.683e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09466 on 1002 degrees of freedom
## Multiple R-squared: 0.01686, Adjusted R-squared: 0.01588 
## F-statistic: 17.18 on 1 and 1002 DF,  p-value: 3.683e-05
plot(modpiTemp)

summary(lm(log(tipRate) ~ log(mean_temp), data = datacomp$data))
## 
## Call:
## lm(formula = log(tipRate) ~ log(mean_temp), data = datacomp$data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.86817 -0.27258 -0.00459  0.30248  1.57693 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -0.31405    0.23323  -1.346    0.178    
## log(mean_temp) -0.20380    0.04046  -5.038 5.59e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4972 on 1002 degrees of freedom
## Multiple R-squared:  0.0247, Adjusted R-squared:  0.02373 
## F-statistic: 25.38 on 1 and 1002 DF,  p-value: 5.587e-07
# modspRateTemp <- pgls(log(tipRate) ~ log(mean_temp), data = datacomp, lambda
# = 'ML') saveRDS(modspRateTemp,
# paste0(folder_path,'pgls/extra/pglsspRateTemp.rds'))
modspRateTemp <- readRDS(paste0(folder_path, "pgls/extra/pglsspRateTemp.rds"))
summary(modspRateTemp)
## 
## Call:
## pgls(formula = log(tipRate) ~ log(mean_temp), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.266146 -0.043390 -0.002865  0.040920  0.240823 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.993
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.990, 0.996)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                 Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)    -2.387185   0.505741 -4.7202 2.691e-06 ***
## log(mean_temp) -0.023511   0.017799 -1.3209    0.1868    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06792 on 1002 degrees of freedom
## Multiple R-squared: 0.001738,    Adjusted R-squared: 0.000742 
## F-statistic: 1.745 on 1 and 1002 DF,  p-value: 0.1868
plot(modspRateTemp)

summary(lm(log(EstPi) ~ log(tipRate) + log(mean_temp), data = datacomp$data))
## 
## Call:
## lm(formula = log(EstPi) ~ log(tipRate) + log(mean_temp), data = datacomp$data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0700 -0.5911  0.1573  0.7200  2.0265 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -6.27587    0.45410 -13.821  < 2e-16 ***
## log(tipRate)   -0.35696    0.06145  -5.809 8.45e-09 ***
## log(mean_temp)  0.30714    0.07968   3.854 0.000123 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9671 on 1001 degrees of freedom
## Multiple R-squared:  0.05392,    Adjusted R-squared:  0.05203 
## F-statistic: 28.52 on 2 and 1001 DF,  p-value: 8.969e-13
# modpirateTemp <- pgls(log(EstPi) ~ log(tipRate) + log(mean_temp), data =
# datacomp, lambda = 'ML') saveRDS(modpirateTemp,
# paste0(folder_path,'pgls/extra/pglspirateTemp.rds'))
modpirateTemp <- readRDS(paste0(folder_path, "pgls/extra/pglspirateTemp.rds"))
summary(modpirateTemp)
## 
## Call:
## pgls(formula = log(EstPi) ~ log(tipRate) + log(mean_temp), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.26611 -0.05683  0.00251  0.05509  0.31879 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.475
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.300, 0.631)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                 Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)    -6.709641   0.722235 -9.2901 < 2.2e-16 ***
## log(tipRate)   -0.349087   0.079102 -4.4131  1.13e-05 ***
## log(mean_temp)  0.343797   0.089642  3.8352 0.0001333 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08792 on 1001 degrees of freedom
## Multiple R-squared: 0.03644, Adjusted R-squared: 0.03452 
## F-statistic: 18.93 on 2 and 1001 DF,  p-value: 8.534e-09
plot(modpirateTemp)

Litter size

par(mfrow = c(2, 2))
summary(lm(log(EstPi) ~ log(litter_or_clutch_size_n), data = datacomp$data))
## 
## Call:
## lm(formula = log(EstPi) ~ log(litter_or_clutch_size_n), data = datacomp$data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1145 -0.6182  0.1446  0.7472  1.9798 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -3.96951    0.05139 -77.245   <2e-16 ***
## log(litter_or_clutch_size_n) -0.01034    0.04621  -0.224    0.823    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9937 on 1002 degrees of freedom
## Multiple R-squared:  4.996e-05,  Adjusted R-squared:  -0.000948 
## F-statistic: 0.05007 on 1 and 1002 DF,  p-value: 0.823
# modpiLitter <- pgls(log(EstPi) ~ log(litter_or_clutch_size_n), data =
# datacomp, lambda = 'ML') saveRDS(modpiLitter,
# paste0(folder_path,'pgls/extra/pglspiLitter.rds'))
modpiLitter <- readRDS(paste0(folder_path, "pgls/extra/pglspiLitter.rds"))
summary(modpiLitter)
## 
## Call:
## pgls(formula = log(EstPi) ~ log(litter_or_clutch_size_n), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30632 -0.06846 -0.00638  0.05812  0.32566 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.583
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.431, 0.705)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                  -3.752108   0.590230 -6.3570 3.119e-10 ***
## log(litter_or_clutch_size_n) -0.212593   0.090264 -2.3552    0.0187 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09766 on 1002 degrees of freedom
## Multiple R-squared: 0.005506,    Adjusted R-squared: 0.004513 
## F-statistic: 5.547 on 1 and 1002 DF,  p-value: 0.0187
plot(modpiLitter)

summary(lm(log(tipRate) ~ log(litter_or_clutch_size_n), data = datacomp$data))
## 
## Call:
## lm(formula = log(tipRate) ~ log(litter_or_clutch_size_n), data = datacomp$data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.87249 -0.28288 -0.01363  0.30216  1.58562 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -1.47395    0.02603 -56.628   <2e-16 ***
## log(litter_or_clutch_size_n) -0.01407    0.02341  -0.601    0.548    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5033 on 1002 degrees of freedom
## Multiple R-squared:  0.0003604,  Adjusted R-squared:  -0.0006372 
## F-statistic: 0.3613 on 1 and 1002 DF,  p-value: 0.5479
# modspRateLitter <- pgls(log(tipRate) ~ log(litter_or_clutch_size_n), data =
# datacomp, lambda = 'ML') saveRDS(modspRateLitter, paste0(folder_path,
# 'pgls/extra/pglsspRateLitter.rds'))
modspRateLitter <- readRDS(paste0(folder_path, "pgls/extra/pglsspRateLitter.rds"))
summary(modspRateLitter)
## 
## Call:
## pgls(formula = log(tipRate) ~ log(litter_or_clutch_size_n), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.274798 -0.041920 -0.001218  0.041449  0.256386 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.993
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.991, 0.996)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                  -2.564613   0.495743 -5.1733 2.778e-07 ***
## log(litter_or_clutch_size_n)  0.046781   0.027425  1.7058   0.08836 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06796 on 1002 degrees of freedom
## Multiple R-squared: 0.002895,    Adjusted R-squared: 0.0019 
## F-statistic:  2.91 on 1 and 1002 DF,  p-value: 0.08836
plot(modspRateLitter)

summary(lm(log(EstPi) ~ log(tipRate) + log(litter_or_clutch_size_n), data = datacomp$data))
## 
## Call:
## lm(formula = log(EstPi) ~ log(tipRate) + log(litter_or_clutch_size_n), 
##     data = datacomp$data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0685 -0.5923  0.1510  0.7406  1.9176 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -4.55111    0.10325 -44.080  < 2e-16 ***
## log(tipRate)                 -0.39459    0.06114  -6.454  1.7e-10 ***
## log(litter_or_clutch_size_n) -0.01589    0.04531  -0.351    0.726    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9742 on 1001 degrees of freedom
## Multiple R-squared:  0.03999,    Adjusted R-squared:  0.03807 
## F-statistic: 20.85 on 2 and 1001 DF,  p-value: 1.344e-09
# modpirateLitter <- pgls(log(EstPi) ~ log(tipRate) +
# log(litter_or_clutch_size_n), data = datacomp, lambda = 'ML')
# saveRDS(modpirateLitter,
# paste0(folder_path,'pgls/extra/pglspirateLitter.rds'))
modpirateLitter <- readRDS(paste0(folder_path, "pgls/extra/pglspirateLitter.rds"))
summary(modpirateLitter)
## 
## Call:
## pgls(formula = log(EstPi) ~ log(tipRate) + log(litter_or_clutch_size_n), 
##     data = datacomp, lambda = "ML")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.307010 -0.060652 -0.004758  0.057641  0.253436 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.500
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.329, 0.647)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                  -4.581298   0.543776 -8.4250 < 2.2e-16 ***
## log(tipRate)                 -0.369008   0.080211 -4.6005 4.755e-06 ***
## log(litter_or_clutch_size_n) -0.184746   0.086085 -2.1461   0.03211 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09005 on 1001 degrees of freedom
## Multiple R-squared: 0.02607, Adjusted R-squared: 0.02413 
## F-statistic:  13.4 on 2 and 1001 DF,  p-value: 1.808e-06
plot(modpirateLitter)

Check results are consistent if excluding species with only size litter of 1

par(mfrow = c(2, 2))
# DataSubsetMCC1 <- gendivSpRateTrait %>% filter(set %in% 'treeMCC',
# litter_or_clutch_size_n > 1) %>% select(species, clades, tipRate, EstPi,
# BodyMassKg_notInputed, geoArea_km2, mean_temp, litter_or_clutch_size_n,
# GenerationLength_d) %>% drop_na() t1 <- TreeSet[['treeMCC']] TreeSubset1 <-
# drop.tip(treeMCC, as.character(t1$tip.label[which(!t1$tip.label %in%
# DataSubsetMCC1$species)])) datacomp1 <- comparative.data(data =
# DataSubsetMCC1, phy = TreeSubset1, names.col = 'species', vcv = TRUE, na.omit
# = TRUE, warn.dropped = TRUE) summary(lm(log(EstPi) ~
# log(litter_or_clutch_size_n), data = datacomp1$data)) modpiLitter1 <-
# pgls(log(EstPi) ~ log(litter_or_clutch_size_n), data = datacomp1, lambda =
# 'ML') saveRDS(modpiLitter1,
# paste0(folder_path,'pgls/extra/pglspiLitter1.rds'))
modpiLitter1 <- readRDS(paste0(folder_path, "pgls/extra/pglspiLitter1.rds"))
summary(modpiLitter1)
## 
## Call:
## pgls(formula = log(EstPi) ~ log(litter_or_clutch_size_n), data = datacomp1, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.25613 -0.05330  0.00140  0.05993  0.36957 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.523
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.349, 0.673)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                  -3.624023   0.543999 -6.6618 5.136e-11 ***
## log(litter_or_clutch_size_n) -0.238154   0.097245 -2.4490   0.01455 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09275 on 773 degrees of freedom
## Multiple R-squared: 0.007699,    Adjusted R-squared: 0.006415 
## F-statistic: 5.998 on 1 and 773 DF,  p-value: 0.01455
plot(modpiLitter1)

Generation length

par(mfrow = c(2, 2))
summary(lm(log(EstPi) ~ log(GenerationLength_d), data = datacomp$data))
## 
## Call:
## lm(formula = log(EstPi) ~ log(GenerationLength_d), data = datacomp$data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0668 -0.6219  0.1339  0.7352  1.9834 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -2.48262    0.28743  -8.637  < 2e-16 ***
## log(GenerationLength_d) -0.21058    0.04022  -5.235 2.01e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9804 on 1002 degrees of freedom
## Multiple R-squared:  0.02662,    Adjusted R-squared:  0.02565 
## F-statistic: 27.41 on 1 and 1002 DF,  p-value: 2.008e-07
# modpigenLengh <- pgls(log(EstPi) ~ log(GenerationLength_d), data = datacomp,
# lambda = 'ML') saveRDS(modpigenLengh,
# paste0(folder_path,'pgls/extra/pglspigenLengh.rds'))
modpigenLengh <- readRDS(paste0(folder_path, "pgls/extra/pglspigenLengh.rds"))
summary(modpigenLengh)
## 
## Call:
## pgls(formula = log(EstPi) ~ log(GenerationLength_d), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.32590 -0.06920 -0.00856  0.05637  0.33915 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.568
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.412, 0.695)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)   
## (Intercept)             -2.325281   0.878666 -2.6464 0.008263 **
## log(GenerationLength_d) -0.221501   0.091324 -2.4254 0.015465 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09633 on 1002 degrees of freedom
## Multiple R-squared: 0.005837,    Adjusted R-squared: 0.004845 
## F-statistic: 5.883 on 1 and 1002 DF,  p-value: 0.01547
plot(modpigenLengh)

summary(lm(log(tipRate) ~ log(GenerationLength_d), data = datacomp$data))
## 
## Call:
## lm(formula = log(tipRate) ~ log(GenerationLength_d), data = datacomp$data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.91217 -0.27645 -0.01196  0.29220  1.61260 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -1.83549    0.14717 -12.472   <2e-16 ***
## log(GenerationLength_d)  0.04915    0.02060   2.386   0.0172 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.502 on 1002 degrees of freedom
## Multiple R-squared:  0.005651,   Adjusted R-squared:  0.004659 
## F-statistic: 5.694 on 1 and 1002 DF,  p-value: 0.0172
# modSpRategenLengh <- pgls(log(tipRate) ~ log(GenerationLength_d), data =
# datacomp, lambda = 'ML') saveRDS(modSpRategenLengh, paste0(folder_path,
# 'pgls/extra/pglsSpRategenLengh.rds'))
modSpRategenLengh <- readRDS(paste0(folder_path, "pgls/extra/pglsSpRategenLengh.rds"))
summary(modSpRategenLengh)
## 
## Call:
## pgls(formula = log(tipRate) ~ log(GenerationLength_d), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.269637 -0.045360 -0.003921  0.036390  0.245839 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.993
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.990, 0.996)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                           Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)             -2.4769954  0.5378322 -4.6055 4.644e-06 ***
## log(GenerationLength_d) -0.0064197  0.0282129 -0.2275      0.82    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06797 on 1002 degrees of freedom
## Multiple R-squared: 5.167e-05,   Adjusted R-squared: -0.0009463 
## F-statistic: 0.05178 on 1 and 1002 DF,  p-value: 0.82
plot(modSpRategenLengh)

summary(lm(log(EstPi) ~ log(tipRate) + log(GenerationLength_d), data = datacomp$data))
## 
## Call:
## lm(formula = log(EstPi) ~ log(tipRate) + log(GenerationLength_d), 
##     data = datacomp$data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0216 -0.6083  0.1065  0.7012  2.0922 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -3.16556    0.30343 -10.433  < 2e-16 ***
## log(tipRate)            -0.37207    0.06060  -6.140 1.19e-09 ***
## log(GenerationLength_d) -0.19230    0.03962  -4.854 1.41e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.963 on 1001 degrees of freedom
## Multiple R-squared:  0.06195,    Adjusted R-squared:  0.06008 
## F-statistic: 33.05 on 2 and 1001 DF,  p-value: 1.256e-14
# modpiSpRategenLengh <- pgls(log(EstPi) ~ log(tipRate) +
# log(GenerationLength_d), data = datacomp, lambda = 'ML')
# saveRDS(modpiSpRategenLengh, paste0(folder_path,
# 'pgls/extra/pglspiSpRategenLengh.rds'))
modpiSpRategenLengh <- readRDS(paste0(folder_path, "pgls/extra/pglspiSpRategenLengh.rds"))
summary(modpiSpRategenLengh)
## 
## Call:
## pgls(formula = log(EstPi) ~ log(tipRate) + log(GenerationLength_d), 
##     data = datacomp, lambda = "ML")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.262485 -0.053856  0.000991  0.057396  0.304842 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.475
##    lower bound : 0.000, p = 1.2212e-15
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.302, 0.628)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -3.105725   0.808492 -3.8414 0.000130 ***
## log(tipRate)            -0.383618   0.079024 -4.8545  1.4e-06 ***
## log(GenerationLength_d) -0.229234   0.085955 -2.6669 0.007779 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08823 on 1001 degrees of freedom
## Multiple R-squared: 0.02919, Adjusted R-squared: 0.02725 
## F-statistic: 15.05 on 2 and 1001 DF,  p-value: 3.634e-07
plot(modpiSpRategenLengh)

##### maybe should use a different variable for longevity

dt <- read.table("/Users/acas/Dropbox/Post-docs/Morlon_Lab/manuscript_projects_info/mammals_genDiv_SpRate/manuscript_scripts_data/traits/input/GenerationLenghtMammals_Pacifici2013.txt",
    header = T, sep = "\t", stringsAsFactors = F) %>%
    mutate(species = str_replace(Scientific_name, " ", "_")) %>%
    mutate_at(vars(ends_with("_d")), as.numeric) %>%
    right_join(select(gen.div, species, EstPi)) %>%
    drop_na(Genus)

a <- boxplot(log(na.omit(dt$Rspan_d)))
out <- dt[log(na.omit(dt$Rspan_d)) %in% a$out, "species"]

DataSubset <- dt %>%
    select(species, EstPi, Rspan_d) %>%
    filter(!duplicated(species), !species %in% out) %>%
    drop_na()

TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
    DataSubset$species)]))

datacompGlobalTraits <- comparative.data(data = DataSubset, phy = TreeSubset, names.col = "species",
    vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)

mod <- pgls(log(EstPi) ~ log(Rspan_d), data = datacompGlobalTraits, lambda = "ML")

summary(mod)
##### Rspan_d negative sign - species reproductive life span - longevity is
##### inversily related with genetic diversity


a <- boxplot(log(na.omit(dt$AFR_d)))

out <- dt[log(na.omit(dt$AFR_d)) %in% a$out, "species"]

DataSubset <- dt %>%
    select(species, EstPi, AFR_d) %>%
    filter(!duplicated(species), !species %in% out) %>%
    drop_na()

TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
    DataSubset$species)]))

datacompGlobalTraits <- comparative.data(data = DataSubset, phy = TreeSubset, names.col = "species",
    vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)

mod <- pgls(log(EstPi) ~ log(AFR_d), data = datacompGlobalTraits, lambda = "ML")

summary(mod)
##### AFR_d (age at first reproduction) is negative sign

#############################

a <- boxplot(log(na.omit(dt$Calculated_GL_d)))

out <- dt[log(na.omit(dt$Calculated_GL_d)) %in% a$out, "species"]

DataSubset <- dt %>%
    select(species, EstPi, Calculated_GL_d) %>%
    filter(!duplicated(species), !species %in% out) %>%
    drop_na()

TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
    DataSubset$species)]))

datacompGlobalTraits <- comparative.data(data = DataSubset, phy = TreeSubset, names.col = "species",
    vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)

mod <- pgls(log(EstPi) ~ log(Calculated_GL_d), data = datacompGlobalTraits, lambda = "ML")

summary(mod)

##### Calculated_GL_d (generation length) is negative sign too, this is the
##### average age of parents of the current cohort, reflecting the turnover
##### rate of breeding individuals in a population. This measure is estimated
##### from reproductive life span and age at first reproduction for most
##### species and input with body size for other species without these
##### measures...
### Body mass considered as a control can't compare models fitted with branch
### length transformations
modpiBM2 <- pgls(log(EstPi) ~ log(BodyMassKg_notInputed), data = datacomp)
modpiSpRateBM <- pgls(log(EstPi) ~ log(tipRate) + log(BodyMassKg_notInputed), data = datacomp)
modpiSpRateTraits2 <- pgls(log(EstPi) ~ log(tipRate) + log(BodyMassKg_notInputed) +
    log(geoArea_km2) + log(mean_temp) + log(litter_or_clutch_size_n) + log(GenerationLength_d),
    data = datacomp)
modpiSpRateTraitsnoBM <- pgls(log(EstPi) ~ log(tipRate) + log(geoArea_km2) + log(mean_temp) +
    log(litter_or_clutch_size_n) + log(GenerationLength_d), data = datacomp)
anova(modpiBM2, modpiSpRateBM, modpiSpRateTraits2, modpiSpRateTraitsnoBM, test = "F")

modpiSpRate2 <- pgls(log(EstPi) ~ log(tipRate), data = datacomp)
anova(modpiSpRateBM, modpiSpRate2, test = "F")

modpiSpRateLong <- pgls(log(EstPi) ~ log(tipRate) + log(GenerationLength_d), data = datacomp)

modpiSpRateLongBM <- pgls(log(EstPi) ~ log(tipRate) + log(GenerationLength_d) + log(BodyMassKg_notInputed),
    data = datacomp)

anova(modpiSpRate2, modpiSpRateLong, modpiSpRateLongBM, test = "F")

Compare clades that were significantly correlated vs non correlated

Are species from clades with significant correlation pi~SpRate significantly different from species from clades without correlation for all variables?

sigClades <- mgsubendivSpRateMutRate %>%
    left_join(., traitData[-7], by = "species")  #don't include dispersal

sigClades$signif <- ifelse(sigClades$clades %in% c("Eulipotyphla", "GuineaPigRelated",
    "Lagomorpha", "Muridae", "PhyllostomidRelated", "SquirrelRelated", "Yinpterochiroptera"),
    "signif", "nonSignif")
sigClades <- left_join(sigClades, unique(MutRateAll[, c("species", "mutclade")]),
    by = "species")

## with MCC rates
sigClades2 <- left_join(sigClades, select(filter(gendivSpRateMutRate, set %in% "treeMCC"),
    species, tipRate, mutRate, Ne, time)) %>%
    rename(SpRate_MCC = tipRate, Ne_MCC = Ne, MutRate_MCC = mutRate)

sigClades3 <- filter(gendivSpRateMutRate, set %in% "treeMCC") %>%
    mutate(signif = case_when(clades %in% c("Eulipotyphla", "GuineaPigRelated", "Lagomorpha",
        "Muridae", "PhyllostomidRelated", "SquirrelRelated", "Yinpterochiroptera") ~
        "signif", TRUE ~ "nonSignif")) %>%
    rename(SpRate_MCC = tipRate, MutRate_MCC = mutRate, Ne_MCC = Ne) %>%
    drop_na(EstPi, expNsub)

msigClades <- sigClades3 %>%
    pivot_longer(cols = c("EstPi", "SpRate_MCC", "BodyMassKg_notInputed", "geoArea_km2",
        "mean_temp", "GenerationLength_d", "litter_or_clutch_size_n", "Ne_MCC", "MutRate_MCC"))
# ggplot(msigClades) + geom_boxplot(aes(y = value, x = signif)) +
# facet_wrap(~name, scales = 'free_y') + scale_y_log10()

correlComp <- ggboxplot(msigClades, y = "value", x = "signif", color = "signif",
    add = "jitter", add.params = list(size = 0.5, alpha = 0.2, color = "gray70")) +
    facet_wrap(~fct_inorder(name), scales = "free_y", ncol = 3) + scale_y_log10() +
    theme_bw() + stat_compare_means(na.rm = T) + labs(x = "Comparison between clades with correlation and without")

correlComp

ggsave(paste0(fig_path, "other/comparisonCorrelatedVsNot_MCC.pdf"), correlComp, width = 12,
    height = 8, useDingbats = FALSE)

Is there a difference in the Ne/mutRate relationships with traits when comparing species from clades that were statistically significant?

out <- boxplot(log(sigClades2$MutRate_mean),plot = F)$out
exclude <- pull(sigClades2[log(sigClades2$MutRate_mean) %in% out,],species)

mutTraits <- sigClades2 %>%
  filter(mean_temp > 10) %>%  #### one data point below 10 that make the plot hard to see
  pivot_longer(cols = c('BodyMassKg_notInputed','geoArea_km2','mean_temp','litter_or_clutch_size_n','GenerationLength_d')) %>%
  ggplot( aes(x = value, y = MutRate_mean, color = signif)) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
  facet_wrap(~fct_inorder(name), scales = 'free', ncol = 1) +
  scale_y_log10() + scale_x_log10() +
  theme_bw() 

NeTraits <- sigClades2 %>%
  filter(mean_temp > 10) %>%
  pivot_longer(cols = c('BodyMassKg_notInputed','geoArea_km2','mean_temp','litter_or_clutch_size_n','GenerationLength_d')) %>%
  ggplot( aes(x = value, y = Ne_mean, color = signif)) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
  facet_wrap(~fct_inorder(name), scales = 'free', ncol = 1) +
  scale_y_log10() + scale_x_log10() +
  theme_bw() 

ggarrange(mutTraits, NeTraits, ncol = 2, common.legend = T, legend = "top")


Tests with mutation rate/Ne

Comparison between mutation rate and Ne relationship with the five tested traits:

Including the linear regression with values from MCC and mean rates

mutTraits <- sigClades2 %>%
  filter(mean_temp > 10) %>%  ## one data point below 10 that makes the figure harder to understand
  pivot_longer(cols = c('BodyMassKg_notInputed','geoArea_km2','mean_temp','litter_or_clutch_size_n','GenerationLength_d')) %>%
  ggplot( aes(x = value, y = MutRate_mean)) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
  geom_smooth(aes(x = value,
                  y = MutRate_MCC), method = "lm", color = "red") + 
  facet_wrap(~fct_inorder(name), scales = 'free', ncol = 1) +
  scale_y_log10() + scale_x_log10() +
  theme_bw() + labs(caption = "Regression Line using MCC rates in red")

NeTraits <- sigClades2 %>%
  filter(mean_temp > 10) %>%
  pivot_longer(cols = c('BodyMassKg_notInputed','geoArea_km2','mean_temp','litter_or_clutch_size_n','GenerationLength_d')) %>%
  ggplot( aes(x = value, y = Ne_mean)) + 
  geom_point() + 
  geom_smooth(method = "lm") + 
  geom_smooth(aes(x = value,
                  y = Ne_MCC), method = "lm", color = "red") + 
  facet_wrap(~fct_inorder(name), scales = 'free', ncol = 1) +
  scale_y_log10() + scale_x_log10() +
  theme_bw() + labs(caption = "Regression Line using MCC rates in red")

ggarrange(mutTraits, NeTraits, ncol = 2)


PGLS Ne vs mutRate correlations

Body mass

MCCNeMutKg <- sigClades2 %>%
    select(species, Ne_MCC, MutRate_MCC, BodyMassKg_notInputed) %>%
    drop_na()

pl1 <- ggplot(MCCNeMutKg, aes(y = Ne_MCC, x = BodyMassKg_notInputed)) + geom_point() +
    geom_smooth(method = "lm") + scale_x_log10() + scale_y_log10()
pl2 <- ggplot(MCCNeMutKg, aes(y = MutRate_MCC, x = BodyMassKg_notInputed)) + geom_point() +
    geom_smooth(method = "lm") + scale_x_log10() + scale_y_log10()

pl1 | pl2

# treeMCC <- TreeSet[['treeMCC']] TreeSubset <- drop.tip(treeMCC,
# as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
# MCCNeMutKg$species)])) datacomp <- comparative.data(data =
# data.frame(MCCNeMutKg), phy = TreeSubset, names.col = 'species', vcv = TRUE,
# na.omit = TRUE, warn.dropped = TRUE) modMCCNeKg <- pgls(log(Ne_MCC) ~
# log(BodyMassKg_notInputed), data = datacomp, lambda = 'ML')
# saveRDS(modMCCNeKg, paste0(folder_path,'pgls/extra/modMCCNeKg.rds'))
# modMCCMutKg <- pgls(log(MutRate_MCC) ~ log(BodyMassKg_notInputed), data =
# datacomp, lambda = 'ML') saveRDS(modMCCMutKg,
# paste0(folder_path,'pgls/extra/modMCCMutKg.rds'))

modMCCNeKg <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeKg.rds"))
summary(modMCCNeKg)
## 
## Call:
## pgls(formula = log(Ne_MCC) ~ log(BodyMassKg_notInputed), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.54427 -0.08479 -0.00518  0.07606  0.40527 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.546
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.422, 0.658)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                11.527148   0.653869 17.6291 < 2.2e-16 ***
## log(BodyMassKg_notInputed) -0.270275   0.027073 -9.9832 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1253 on 1356 degrees of freedom
## Multiple R-squared: 0.06847, Adjusted R-squared: 0.06778 
## F-statistic: 99.67 on 1 and 1356 DF,  p-value: < 2.2e-16
modMCCMutKg <- readRDS(paste0(folder_path, "pgls/extra/modMCCMutKg.rds"))
summary(modMCCMutKg)
## 
## Call:
## pgls(formula = log(MutRate_MCC) ~ log(BodyMassKg_notInputed), 
##     data = datacomp, lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37265 -0.04992  0.00129  0.04837  0.37575 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.630
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.537, 0.711)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                              Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept)                -15.510063   0.460287 -33.6965 < 2.2e-16 ***
## log(BodyMassKg_notInputed)   0.127002   0.017389   7.3034 4.767e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08233 on 1356 degrees of freedom
## Multiple R-squared: 0.03785, Adjusted R-squared: 0.03714 
## F-statistic: 53.34 on 1 and 1356 DF,  p-value: 4.767e-13

Range Area

MCCNeMutArea <- sigClades2 %>%
    select(species, Ne_MCC, MutRate_MCC, geoArea_km2) %>%
    drop_na()

pl1 <- ggplot(MCCNeMutArea, aes(y = Ne_MCC, x = geoArea_km2)) + geom_point() + geom_smooth(method = "lm") +
    scale_x_log10() + scale_y_log10()
pl2 <- ggplot(MCCNeMutArea, aes(y = MutRate_MCC, x = geoArea_km2)) + geom_point() +
    geom_smooth(method = "lm") + scale_x_log10() + scale_y_log10()

pl1 | pl2

# treeMCC <- TreeSet[['treeMCC']] TreeSubset <- drop.tip(treeMCC,
# as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
# MCCNeMutArea$species)])) datacomp <- comparative.data(data =
# data.frame(MCCNeMutArea), phy = TreeSubset, names.col = 'species', vcv =
# TRUE, na.omit = TRUE, warn.dropped = TRUE) modMCCNeArea <- pgls(log(Ne_MCC) ~
# log(geoArea_km2), data = datacomp, lambda = 'ML') saveRDS(modMCCNeArea,
# paste0(folder_path,'pgls/extra/modMCCNeArea.rds')) modMCCMutArea <-
# pgls(log(MutRate_MCC) ~ log(geoArea_km2), data = datacomp, lambda = 'ML')
# saveRDS(modMCCMutArea, paste0(folder_path,'pgls/extra/modMCCMutArea.rds'))

modMCCNeArea <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeArea.rds"))
summary(modMCCNeArea)
## 
## Call:
## pgls(formula = log(Ne_MCC) ~ log(geoArea_km2), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48992 -0.10490 -0.00871  0.09461  0.49095 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.721
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.637, 0.789)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                   Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)      10.143670   0.922281 10.9985 < 2.2e-16 ***
## log(geoArea_km2)  0.126434   0.014192  8.9089 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1514 on 1353 degrees of freedom
## Multiple R-squared: 0.05541, Adjusted R-squared: 0.05471 
## F-statistic: 79.37 on 1 and 1353 DF,  p-value: < 2.2e-16
modMCCMutArea <- readRDS(paste0(folder_path, "pgls/extra/modMCCMutArea.rds"))
summary(modMCCMutArea)
## 
## Call:
## pgls(formula = log(MutRate_MCC) ~ log(geoArea_km2), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41689 -0.05621 -0.00162  0.05244  0.45942 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.700
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.625, 0.763)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                     Estimate  Std. Error  t value Pr(>|t|)    
## (Intercept)      -15.6127697   0.5417953 -28.8167   <2e-16 ***
## log(geoArea_km2)  -0.0017346   0.0086849  -0.1997   0.8417    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09008 on 1353 degrees of freedom
## Multiple R-squared: 2.948e-05,   Adjusted R-squared: -0.0007096 
## F-statistic: 0.03989 on 1 and 1353 DF,  p-value: 0.8417

Ne seems to be positively correlated with geographic range size which can be a proxy for population size, while mutation rate is not, so matching popgen predictions?


Mean Temperature

MCCNeMutTemp <- sigClades2 %>%
    select(species, Ne_MCC, MutRate_MCC, mean_temp) %>%
    drop_na()

pl1 <- ggplot(MCCNeMutTemp, aes(y = Ne_MCC, x = mean_temp)) + geom_point() + geom_smooth(method = "lm") +
    scale_x_log10() + scale_y_log10()
pl2 <- ggplot(MCCNeMutTemp, aes(y = MutRate_MCC, x = mean_temp)) + geom_point() +
    geom_smooth(method = "lm") + scale_x_log10() + scale_y_log10()

pl1 | pl2

# treeMCC <- TreeSet[['treeMCC']] TreeSubset <- drop.tip(treeMCC,
# as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
# MCCNeMutTemp$species)])) datacomp <- comparative.data(data =
# data.frame(MCCNeMutTemp), phy = TreeSubset, names.col = 'species', vcv =
# TRUE, na.omit = TRUE, warn.dropped = TRUE) modMCCNeTemp <- pgls(log(Ne_MCC) ~
# log(mean_temp), data = datacomp, lambda = 'ML') saveRDS(modMCCNeTemp,
# paste0(folder_path,'pgls/extra/modMCCNeTemp.rds')) modMCCMutTemp <-
# pgls(log(MutRate_MCC) ~ log(mean_temp), data = datacomp, lambda = 'ML')
# saveRDS(modMCCMutTemp, paste0(folder_path,'pgls/extra/modMCCMutTemp.rds'))

modMCCNeTemp <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeTemp.rds"))
summary(modMCCNeTemp)
## 
## Call:
## pgls(formula = log(Ne_MCC) ~ log(mean_temp), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.47142 -0.08717 -0.00124  0.09293  0.45335 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.663
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.544, 0.759)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    10.77757    1.05315 10.2337  < 2e-16 ***
## log(mean_temp)  0.20290    0.11409  1.7784  0.07564 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1423 on 1032 degrees of freedom
## Multiple R-squared: 0.003055,    Adjusted R-squared: 0.002089 
## F-statistic: 3.163 on 1 and 1032 DF,  p-value: 0.07564
modMCCMutTemp <- readRDS(paste0(folder_path, "pgls/extra/modMCCMutTemp.rds"))
summary(modMCCMutTemp)
## 
## Call:
## pgls(formula = log(MutRate_MCC) ~ log(mean_temp), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.40711 -0.04760  0.00364  0.05316  0.39243 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.661
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.567, 0.742)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                  Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)    -16.301163   0.646457 -25.2162   <2e-16 ***
## log(mean_temp)   0.100722   0.070161   1.4356   0.1514    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08738 on 1032 degrees of freedom
## Multiple R-squared: 0.001993,    Adjusted R-squared: 0.001026 
## F-statistic: 2.061 on 1 and 1032 DF,  p-value: 0.1514

Litter size

MCCNeMutLitter <- sigClades2 %>%
    select(species, Ne_MCC, MutRate_MCC, litter_or_clutch_size_n) %>%
    drop_na()

pl1 <- ggplot(MCCNeMutLitter, aes(y = Ne_MCC, x = litter_or_clutch_size_n)) + geom_point() +
    geom_smooth(method = "lm") + scale_x_log10() + scale_y_log10()
pl2 <- ggplot(MCCNeMutLitter, aes(y = MutRate_MCC, x = litter_or_clutch_size_n)) +
    geom_point() + geom_smooth(method = "lm") + scale_x_log10() + scale_y_log10()

pl1 | pl2

# treeMCC <- TreeSet[['treeMCC']] TreeSubset <- drop.tip(treeMCC,
# as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
# MCCNeMutLitter$species)])) datacomp <- comparative.data(data =
# data.frame(MCCNeMutLitter), phy = TreeSubset, names.col = 'species', vcv =
# TRUE, na.omit = TRUE, warn.dropped = TRUE) modMCCNeLitter <- pgls(log(Ne_MCC)
# ~ log(litter_or_clutch_size_n), data = datacomp, lambda = 'ML')
# saveRDS(modMCCNeLitter, paste0(folder_path,'pgls/extra/modMCCNeLitter.rds'))
# modMCCMutLitter <- pgls(log(MutRate_MCC) ~ log(litter_or_clutch_size_n), data
# = datacomp, lambda = 'ML') saveRDS(modMCCMutLitter,
# paste0(folder_path,'pgls/extra/modMCCMutLitter.rds'))

modMCCNeLitter <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeLitter.rds"))
summary(modMCCNeLitter)
## 
## Call:
## pgls(formula = log(Ne_MCC) ~ log(litter_or_clutch_size_n), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.46693 -0.08562  0.00064  0.08423  0.44237 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.623
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.507, 0.723)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  11.64196    0.78108 14.9050  < 2e-16 ***
## log(litter_or_clutch_size_n)  0.20696    0.11426  1.8113  0.07038 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1389 on 1091 degrees of freedom
## Multiple R-squared: 0.002998,    Adjusted R-squared: 0.002084 
## F-statistic: 3.281 on 1 and 1091 DF,  p-value: 0.07038
modMCCMutLitter <- readRDS(paste0(folder_path, "pgls/extra/modMCCMutLitter.rds"))
summary(modMCCMutLitter)
## 
## Call:
## pgls(formula = log(MutRate_MCC) ~ log(litter_or_clutch_size_n), 
##     data = datacomp, lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.29063 -0.05220  0.00013  0.04800  0.38418 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.581
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.476, 0.676)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                                Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept)                  -15.262477   0.439782 -34.7046 < 2.2e-16 ***
## log(litter_or_clutch_size_n)  -0.383732   0.067873  -5.6536 2.005e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08082 on 1091 degrees of freedom
## Multiple R-squared: 0.02846, Adjusted R-squared: 0.02757 
## F-statistic: 31.96 on 1 and 1091 DF,  p-value: 2.005e-08

Generation Length

MCCNeMutGLength <- sigClades2 %>%
    select(species, Ne_MCC, MutRate_MCC, GenerationLength_d) %>%
    drop_na()

pl1 <- ggplot(MCCNeMutGLength, aes(y = Ne_MCC, x = GenerationLength_d)) + geom_point() +
    geom_smooth(method = "lm") + scale_x_log10() + scale_y_log10()
pl2 <- ggplot(MCCNeMutGLength, aes(y = MutRate_MCC, x = GenerationLength_d)) + geom_point() +
    geom_smooth(method = "lm") + scale_x_log10() + scale_y_log10()

pl1 | pl2

# treeMCC <- TreeSet[['treeMCC']] TreeSubset <- drop.tip(treeMCC,
# as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
# MCCNeMutGLength$species)])) datacomp <- comparative.data(data =
# data.frame(MCCNeMutGLength), phy = TreeSubset, names.col = 'species', vcv =
# TRUE, na.omit = TRUE, warn.dropped = TRUE) modMCCNeGLength <-
# pgls(log(Ne_MCC) ~ log(GenerationLength_d), data = datacomp, lambda = 'ML')
# saveRDS(modMCCNeGLength,
# paste0(folder_path,'pgls/extra/modMCCNeGLength.rds')) modMCCMutGLength <-
# pgls(log(MutRate_MCC) ~ log(GenerationLength_d), data = datacomp, lambda =
# 'ML') saveRDS(modMCCMutGLength,
# paste0(folder_path,'pgls/extra/modMCCMutGLength.rds'))

modMCCNeGLength <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeGLength.rds"))
summary(modMCCNeGLength)
## 
## Call:
## pgls(formula = log(Ne_MCC) ~ log(GenerationLength_d), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.33706 -0.07092  0.00339  0.07605  0.43615 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.353
##    lower bound : 0.000, p = 6.6613e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.201, 0.508)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                          Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)             20.753103   0.795793  26.078 < 2.2e-16 ***
## log(GenerationLength_d) -1.261920   0.092566 -13.633 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1067 on 1376 degrees of freedom
## Multiple R-squared: 0.119,   Adjusted R-squared: 0.1184 
## F-statistic: 185.9 on 1 and 1376 DF,  p-value: < 2.2e-16
modMCCMutGLength <- readRDS(paste0(folder_path, "pgls/extra/modMCCMutGLength.rds"))
summary(modMCCMutGLength)
## 
## Call:
## pgls(formula = log(MutRate_MCC) ~ log(GenerationLength_d), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.242143 -0.033702  0.001037  0.034028  0.238696 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.307
##    lower bound : 0.000, p = 2.7756e-14
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.178, 0.447)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                           Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)             -22.531352   0.433682 -51.954 < 2.2e-16 ***
## log(GenerationLength_d)   0.969306   0.051373  18.868 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05971 on 1376 degrees of freedom
## Multiple R-squared: 0.2055,  Adjusted R-squared: 0.205 
## F-statistic:   356 on 1 and 1376 DF,  p-value: < 2.2e-16

Ne/MutRate ~ spRate + traits

MCCNeSpRateTraits <- sigClades2 %>%
    select(species, Ne_MCC, SpRate_MCC, BodyMassKg_notInputed, geoArea_km2, mean_temp,
        litter_or_clutch_size_n, GenerationLength_d) %>%
    drop_na()

treeMCC <- TreeSet[["treeMCC"]]

TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
    MCCNeSpRateTraits$species)]))

datacomp <- comparative.data(data = data.frame(MCCNeSpRateTraits), phy = TreeSubset,
    names.col = "species", vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)

modMCCNeSpRateTraits <- pgls(log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) +
    log(geoArea_km2) + log(mean_temp) + log(litter_or_clutch_size_n) + log(GenerationLength_d),
    data = datacomp, lambda = "ML")
saveRDS(modMCCNeSpRateTraits, paste0(folder_path, "pgls/extra/modMCCNeSpRateTraits.rds"))
modMCCNeSpRateTraits <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRateTraits.rds"))
summary(modMCCNeSpRateTraits)
## 
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) + 
##     log(geoArea_km2) + log(mean_temp) + log(litter_or_clutch_size_n) + 
##     log(GenerationLength_d), data = datacomp, lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35798 -0.06726  0.00038  0.06484  0.28728 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.341
##    lower bound : 0.000, p = 1.4454e-08
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.164, 0.530)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                  15.665790   1.211749 12.9283 < 2.2e-16 ***
## log(SpRate_MCC)              -0.154028   0.099381 -1.5499  0.121539    
## log(BodyMassKg_notInputed)   -0.140818   0.031433 -4.4800 8.471e-06 ***
## log(geoArea_km2)              0.129270   0.018801  6.8758 1.183e-11 ***
## log(mean_temp)                0.238170   0.107015  2.2256  0.026301 *  
## log(litter_or_clutch_size_n) -0.334276   0.110605 -3.0222  0.002583 ** 
## log(GenerationLength_d)      -0.999144   0.125026 -7.9915 4.219e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09969 on 862 degrees of freedom
## Multiple R-squared: 0.1956,  Adjusted R-squared:  0.19 
## F-statistic: 34.94 on 6 and 862 DF,  p-value: < 2.2e-16

MCCMutRateSpRateTraits <- sigClades2 %>%
    select(species, MutRate_MCC, SpRate_MCC, Ne_MCC, BodyMassKg_notInputed, geoArea_km2,
        mean_temp, litter_or_clutch_size_n, GenerationLength_d) %>%
    drop_na()

# treeMCC <- TreeSet[['treeMCC']] TreeSubset <- drop.tip(treeMCC,
# as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
# MCCMutRateSpRateTraits$species)])) datacomp <- comparative.data(data =
# data.frame(MCCMutRateSpRateTraits), phy = TreeSubset, names.col = 'species',
# vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE) modMCCMutRateSpRateTraits <-
# pgls(log(MutRate_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) +
# log(geoArea_km2) + log(mean_temp) + log(litter_or_clutch_size_n) +
# log(GenerationLength_d), data = datacomp, lambda = 'ML')
# saveRDS(modMCCMutRateSpRateTraits,
# paste0(folder_path,'pgls/extra/modMCCMutRateSpRateTraits.rds'))

modMCCMutRateSpRateTraits <- readRDS(paste0(folder_path, "pgls/extra/modMCCMutRateSpRateTraits.rds"))
summary(modMCCMutRateSpRateTraits)
## 
## Call:
## pgls(formula = log(MutRate_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) + 
##     log(geoArea_km2) + log(mean_temp) + log(litter_or_clutch_size_n) + 
##     log(GenerationLength_d), data = datacomp, lambda = "ML")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.199101 -0.037127 -0.000453  0.035138  0.215148 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.244
##    lower bound : 0.000, p = 3.1609e-08
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.120, 0.398)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                                 Estimate  Std. Error  t value  Pr(>|t|)    
## (Intercept)                  -22.4318630   0.7058234 -31.7811 < 2.2e-16 ***
## log(SpRate_MCC)               -0.1550365   0.0589336  -2.6307  0.008673 ** 
## log(BodyMassKg_notInputed)     0.0018340   0.0178578   0.1027  0.918227    
## log(geoArea_km2)               0.0034342   0.0115157   0.2982  0.765609    
## log(mean_temp)                 0.0600173   0.0651450   0.9213  0.357158    
## log(litter_or_clutch_size_n)  -0.0939521   0.0649365  -1.4468  0.148308    
## log(GenerationLength_d)        0.8743938   0.0731407  11.9550 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05819 on 862 degrees of freedom
## Multiple R-squared: 0.2378,  Adjusted R-squared: 0.2325 
## F-statistic: 44.82 on 6 and 862 DF,  p-value: < 2.2e-16

Speciation rate is not correlated with Ne when including traits but it’s the only variable that correlates with mutation rate. Perhaps the analyses with traits would be more meaningful to explain the correlation between genetic diversity and speciation rates when decomposing pi into Ne and mutation rate (theta (pi) = 4Neu for an idealized population and diploid and nuclear data), but I am not sure if we wouldn’t be overinterpreting these results. Why would Ne and spRate be correlated but when adding traits to the model spRate is not significant anymore, indirect effects?


# treeMCC <- TreeSet[['treeMCC']] TreeSubset <- drop.tip(treeMCC,
# as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
# MCCMutRateSpRateTraits$species)])) datacomp <- comparative.data(data =
# data.frame(MCCMutRateSpRateTraits), phy = TreeSubset, names.col = 'species',
# vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE) modMCCNeSpRate <-
# pgls(log(Ne_MCC) ~ log(SpRate_MCC), data = datacomp, lambda = 'ML')
# saveRDS(modMCCNeSpRate, paste0(folder_path, 'pgls/extra/modMCCNeSpRate.rds'))
# modMCCNeSpRatekg <- pgls(log(Ne_MCC) ~ log(SpRate_MCC) +
# log(BodyMassKg_notInputed), data = datacomp, lambda = 'ML')
# saveRDS(modMCCNeSpRatekg, paste0(folder_path,
# 'pgls/extra/modMCCNeSpRatekg.rds')) modMCCNeSpRateTemp0kg <- pgls(log(Ne_MCC)
# ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) + log(mean_temp), data =
# datacomp, lambda = 'ML') saveRDS(modMCCNeSpRateTemp0kg, paste0(folder_path,
# 'pgls/extra/modMCCNeSpRateTemp0kg.rds')) modMCCNeSpRateArea <-
# pgls(log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) +
# log(geoArea_km2), data = datacomp, lambda = 'ML') saveRDS(modMCCNeSpRateArea,
# paste0(folder_path,'pgls/extra/modMCCNeSpRateArea.rds'))
# modMCCNeSpRateTemp0Area <- pgls(log(Ne_MCC) ~ log(SpRate_MCC) +
# log(geoArea_km2) + log(mean_temp), data = datacomp, lambda = 'ML')
# saveRDS(modMCCNeSpRateTemp0Area,
# paste0(folder_path,'pgls/extra/modMCCNeSpRateTemp0Area.rds'))
# modMCCNeSpRateTemp0 <- pgls(log(Ne_MCC) ~ log(SpRate_MCC) + log(mean_temp),
# data = datacomp, lambda = 'ML') saveRDS(modMCCNeSpRateTemp0,
# paste0(folder_path, 'pgls/extra/modMCCNeSpRateTemp0.rds')) modMCCNeSpRateTemp
# <- pgls(log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) +
# log(geoArea_km2) + log(mean_temp), data = datacomp, lambda = 'ML')
# saveRDS(modMCCNeSpRateTemp, paste0(folder_path,
# 'pgls/extra/modMCCNeSpRateTemp.rds')) modMCCNeSpRateLitter <-
# pgls(log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) +
# log(geoArea_km2) + log(mean_temp) + log(litter_or_clutch_size_n), data =
# datacomp, lambda = 'ML') saveRDS(modMCCNeSpRateLitter, paste0(folder_path,
# 'pgls/extra/modMCCNeSpRateLitter.rds')) modMCCNeSpRateLitter0 <-
# pgls(log(Ne_MCC) ~ log(SpRate_MCC) + log(litter_or_clutch_size_n), data =
# datacomp, lambda = 'ML') saveRDS(modMCCNeSpRateLitter0, paste0(folder_path,
# 'pgls/extra/modMCCNeSpRateLitter0.rds')) modMCCNeSpRateGLength <-
# pgls(log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) +
# log(geoArea_km2) + log(mean_temp) + log(litter_or_clutch_size_n) +
# log(GenerationLength_d), data = datacomp, lambda = 'ML')
# saveRDS(modMCCNeSpRateGLength, paste0(folder_path,
# 'pgls/extra/modMCCNeSpRateGLength.rds')) modMCCNeSpRateGLength0 <-
# pgls(log(Ne_MCC) ~ log(SpRate_MCC) + log(GenerationLength_d), data =
# datacomp, lambda = 'ML') saveRDS(modMCCNeSpRateGLength0, paste0(folder_path,
# 'pgls/extra/modMCCNeSpRateGLength0.rds'))

modMCCNeSpRate <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRate.rds"))
summary(modMCCNeSpRate)
## 
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.54477 -0.09021  0.00215  0.08878  0.39689 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.608
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.473, 0.724)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     11.47250    0.76894 14.9198   <2e-16 ***
## log(SpRate_MCC) -0.23642    0.12114 -1.9517   0.0513 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.134 on 867 degrees of freedom
## Multiple R-squared: 0.004374,    Adjusted R-squared: 0.003226 
## F-statistic: 3.809 on 1 and 867 DF,  p-value: 0.0513
modMCCNeSpRatekg <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRatekg.rds"))
summary(modMCCNeSpRatekg)
## 
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed), 
##     data = datacomp, lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.53942 -0.07676  0.00041  0.07461  0.38445 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.511
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.361, 0.651)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                11.173545   0.631838 17.6842  < 2e-16 ***
## log(SpRate_MCC)            -0.219117   0.111453 -1.9660  0.04962 *  
## log(BodyMassKg_notInputed) -0.267031   0.031427 -8.4968  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1184 on 866 degrees of freedom
## Multiple R-squared: 0.08206, Adjusted R-squared: 0.07994 
## F-statistic: 38.71 on 2 and 866 DF,  p-value: < 2.2e-16
modMCCNeSpRateTemp0kg <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRateTemp0kg.rds"))
summary(modMCCNeSpRateTemp0kg)
## 
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) + 
##     log(mean_temp), data = datacomp, lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.54008 -0.08460 -0.01031  0.06740  0.38758 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.511
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.365, 0.649)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                10.133349   0.897767 11.2873  < 2e-16 ***
## log(SpRate_MCC)            -0.204774   0.111708 -1.8331  0.06713 .  
## log(BodyMassKg_notInputed) -0.264978   0.031429 -8.4309  < 2e-16 ***
## log(mean_temp)              0.182945   0.112211  1.6304  0.10339    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1183 on 865 degrees of freedom
## Multiple R-squared: 0.08485, Adjusted R-squared: 0.08168 
## F-statistic: 26.73 on 3 and 865 DF,  p-value: < 2.2e-16
modMCCNeSpRateArea <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRateArea.rds"))
summary(modMCCNeSpRateArea)
## 
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) + 
##     log(geoArea_km2), data = datacomp, lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38499 -0.07232  0.00743  0.08131  0.36520 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.543
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.398, 0.676)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                 9.610188   0.695754 13.8126 < 2.2e-16 ***
## log(SpRate_MCC)            -0.147557   0.111082 -1.3284    0.1844    
## log(BodyMassKg_notInputed) -0.276661   0.031373 -8.8184 < 2.2e-16 ***
## log(geoArea_km2)            0.123359   0.019238  6.4121 2.359e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1189 on 865 degrees of freedom
## Multiple R-squared: 0.122,   Adjusted R-squared: 0.119 
## F-statistic: 40.08 on 3 and 865 DF,  p-value: < 2.2e-16
modMCCNeSpRateTemp0Area <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRateTemp0Area.rds"))
summary(modMCCNeSpRateTemp0Area)
## 
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(geoArea_km2) + 
##     log(mean_temp), data = datacomp, lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39931 -0.08740 -0.00013  0.09494  0.46578 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.637
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.510, 0.745)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                   Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)       8.383318   1.068123  7.8486 1.243e-14 ***
## log(SpRate_MCC)  -0.145596   0.121099 -1.2023   0.22958    
## log(geoArea_km2)  0.122371   0.019983  6.1238 1.385e-09 ***
## log(mean_temp)    0.271434   0.115039  2.3595   0.01852 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.135 on 865 degrees of freedom
## Multiple R-squared: 0.04956, Adjusted R-squared: 0.04626 
## F-statistic: 15.03 on 3 and 865 DF,  p-value: 1.516e-09
modMCCNeSpRateTemp <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRateTemp.rds"))
summary(modMCCNeSpRateTemp)
## 
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) + 
##     log(geoArea_km2) + log(mean_temp), data = datacomp, lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38784 -0.07579 -0.00494  0.07775  0.35575 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.543
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.400, 0.674)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                 8.289628   0.944975  8.7723 < 2.2e-16 ***
## log(SpRate_MCC)            -0.128794   0.111250 -1.1577   0.24731    
## log(BodyMassKg_notInputed) -0.274022   0.031342 -8.7429 < 2.2e-16 ***
## log(geoArea_km2)            0.125758   0.019238  6.5371 1.072e-10 ***
## log(mean_temp)              0.226806   0.110057  2.0608   0.03962 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1186 on 864 degrees of freedom
## Multiple R-squared: 0.1263,  Adjusted R-squared: 0.1223 
## F-statistic: 31.23 on 4 and 864 DF,  p-value: < 2.2e-16
modMCCNeSpRateTemp0 <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRateTemp0.rds"))
summary(modMCCNeSpRateTemp0)
## 
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(mean_temp), 
##     data = datacomp, lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.54672 -0.08820 -0.00425  0.08745  0.38011 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.609
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.477, 0.724)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     10.18085    1.01891  9.9919  < 2e-16 ***
## log(SpRate_MCC) -0.21903    0.12135 -1.8049  0.07144 .  
## log(mean_temp)   0.22651    0.11707  1.9348  0.05334 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1339 on 866 degrees of freedom
## Multiple R-squared: 0.008644,    Adjusted R-squared: 0.006355 
## F-statistic: 3.776 on 2 and 866 DF,  p-value: 0.0233
modMCCNeSpRateLitter <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRateLitter.rds"))
summary(modMCCNeSpRateLitter)
## 
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) + 
##     log(geoArea_km2) + log(mean_temp) + log(litter_or_clutch_size_n), 
##     data = datacomp, lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.45234 -0.08206 -0.00008  0.07695  0.36439 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.565
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.424, 0.690)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                   8.602918   0.981412  8.7659 < 2.2e-16 ***
## log(SpRate_MCC)              -0.121159   0.112305 -1.0788   0.28096    
## log(BodyMassKg_notInputed)   -0.288956   0.032791 -8.8121 < 2.2e-16 ***
## log(geoArea_km2)              0.130584   0.019422  6.7236 3.222e-11 ***
## log(mean_temp)                0.195982   0.111846  1.7522   0.08009 .  
## log(litter_or_clutch_size_n) -0.193329   0.121942 -1.5854   0.11324    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1208 on 863 degrees of freedom
## Multiple R-squared: 0.128,   Adjusted R-squared: 0.123 
## F-statistic: 25.35 on 5 and 863 DF,  p-value: < 2.2e-16
modMCCNeSpRateLitter0 <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRateLitter0.rds"))
summary(modMCCNeSpRateLitter0)
## 
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(litter_or_clutch_size_n), 
##     data = datacomp, lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41472 -0.08482  0.00675  0.08964  0.39498 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.594
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.452, 0.717)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  11.33891    0.76347 14.8519  < 2e-16 ***
## log(SpRate_MCC)              -0.24158    0.12036 -2.0072  0.04504 *  
## log(litter_or_clutch_size_n)  0.11880    0.12466  0.9530  0.34085    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1322 on 866 degrees of freedom
## Multiple R-squared: 0.005572,    Adjusted R-squared: 0.003275 
## F-statistic: 2.426 on 2 and 866 DF,  p-value: 0.08899
modMCCNeSpRateGLength <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRateGLength.rds"))
summary(modMCCNeSpRateGLength)
## 
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(BodyMassKg_notInputed) + 
##     log(geoArea_km2) + log(mean_temp) + log(litter_or_clutch_size_n) + 
##     log(GenerationLength_d), data = datacomp, lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35798 -0.06726  0.00038  0.06484  0.28728 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.341
##    lower bound : 0.000, p = 1.4454e-08
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.164, 0.530)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                  15.665790   1.211749 12.9283 < 2.2e-16 ***
## log(SpRate_MCC)              -0.154028   0.099381 -1.5499  0.121539    
## log(BodyMassKg_notInputed)   -0.140818   0.031433 -4.4800 8.471e-06 ***
## log(geoArea_km2)              0.129270   0.018801  6.8758 1.183e-11 ***
## log(mean_temp)                0.238170   0.107015  2.2256  0.026301 *  
## log(litter_or_clutch_size_n) -0.334276   0.110605 -3.0222  0.002583 ** 
## log(GenerationLength_d)      -0.999144   0.125026 -7.9915 4.219e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09969 on 862 degrees of freedom
## Multiple R-squared: 0.1956,  Adjusted R-squared:  0.19 
## F-statistic: 34.94 on 6 and 862 DF,  p-value: < 2.2e-16
modMCCNeSpRateGLength0 <- readRDS(paste0(folder_path, "pgls/extra/modMCCNeSpRateGLength0.rds"))
summary(modMCCNeSpRateGLength0)
## 
## Call:
## pgls(formula = log(Ne_MCC) ~ log(SpRate_MCC) + log(GenerationLength_d), 
##     data = datacomp, lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35204 -0.06375  0.00524  0.07292  0.28232 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.248
##    lower bound : 0.000, p = 1.6402e-05
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.088, 0.455)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                          Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept)             19.293882   0.793870  24.3036 < 2.2e-16 ***
## log(SpRate_MCC)         -0.288733   0.098269  -2.9382  0.003389 ** 
## log(GenerationLength_d) -1.126660   0.097269 -11.5830 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09844 on 866 degrees of freedom
## Multiple R-squared: 0.1428,  Adjusted R-squared: 0.1409 
## F-statistic: 72.15 on 2 and 866 DF,  p-value: < 2.2e-16

Perhaps due to some interaction between geographic range size and temperature? Still not sure how to interpret this…


Maybe should do proper phylogenetic analyses (PGLS and BMLM for posterior trees) with mutation rate/Ne and traits? Using MCC tree, Body size, generation length, range area and mean temperature seem to be correlated with Ne and not correlated with mutation rate. Higher Ne for species with lower body sizes, larger range sizes, that occur in warmer climates and short longevity.

Range size being correlated with Ne and not with mutation rate gives a bit more confidence to these values of Ne (better would be to correlate with census size). But could had made sense mean temperature to be correlated with mutation rate.


Why is mutation rate not positively correlated with speciation rate?

Expectation is that the higher the mutation rate the fastest speciation splits would occur.

Compare mutation rate, speciation rate, Ne and time (branch lenghts at the tips) of clades that have a significant negative pi ~ spRate vs clades that don’t, compare values from both mean rates and MCC rates.

correlComp2 <- sigClades2 %>%
    pivot_longer(cols = c("MutRate_MCC", "SpRate_MCC", "Ne_MCC", "MutRate_mean",
        "SpRate_mean", "Ne_mean", "time")) %>%
    ggboxplot(., y = "value", x = "signif", add = "jitter", add.params = list(size = 0.5,
        alpha = 0.2, color = "gray70")) + facet_wrap(~fct_inorder(name), scales = "free_y",
    ncol = 3) + scale_y_log10() + theme_bw() + stat_compare_means(na.rm = T) + labs(x = "Comparison between clades with correlation and without")
correlComp2

filter(sigClades2, !species %in% exclude) %>%
    pivot_longer(cols = c("MutRate_MCC", "SpRate_MCC", "Ne_MCC", "MutRate_mean",
        "SpRate_mean", "Ne_mean", "time")) %>%
    ggboxplot(., y = "value", x = "signif", add = "jitter", add.params = list(size = 0.5,
        alpha = 0.2, color = "gray70")) + facet_wrap(~fct_inorder(name), scales = "free_y",
    ncol = 3) + scale_y_log10() + theme_bw() + stat_compare_means(na.rm = T) + labs(title = "Excluding mutation rate outliers",
    x = "Comparison between clades with correlation and without")

Why are MCC rates different from mean rates when comparing significant clades vs not (when comparing species dataset with pi)? Removing mutation rate outliers doesn’t seem to do much…


filter(sigClades2, !species %in% exclude) %>%
    pivot_longer(cols = c("EstPi", "SpRate_MCC", "MutRate_MCC", "Ne_MCC")) %>%
    ggplot(aes(x = time, y = value)) + geom_point(alpha = 0.3, size = 0.7) + geom_smooth(method = "lm",
    linetype = 2) + geom_smooth(aes(color = mutclade), method = "lm", se = F) + scale_x_log10() +
    scale_y_log10() + theme_bw() + facet_wrap(~fct_inorder(name), scales = "free_y",
    ncol = 2, strip.position = "left") + theme(legend.position = "bottom", strip.text = element_text(size = 14),
    strip.placement = "outside", strip.background.y = element_blank(), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()) + scale_colour_manual(values = iwanthue(16)) +
    labs(x = "Species age (time, Mya)", y = "")

Tip speciation rate is highly correlated with tip branch lengths, so the shorter the branches the higher the speciation rate but the shorter the branches the fewer substitutions to get this crude mutation rate.


Why MCC mutation rate vs Mean mutation rate is a bit different?

Checking that MCC vs mean values are highly correlated…

MutRate_meanMCC <- filter(sigClades2, !species %in% exclude) %>%
    ggplot(aes(x = MutRate_MCC, y = MutRate_mean, color = signif)) + geom_point() +
    geom_smooth(method = "lm") + scale_y_log10() + scale_x_log10() + theme_bw()

SpRate_meanMCC <- filter(sigClades2, !species %in% exclude) %>%
    ggplot(aes(x = SpRate_MCC, y = SpRate_mean, color = signif)) + geom_point() +
    geom_smooth(method = "lm") + scale_y_log10() + scale_x_log10() + theme_bw()

Ne_meanMCC <- filter(sigClades2, !species %in% exclude) %>%
    ggplot(aes(x = Ne_MCC, y = Ne_mean, color = signif)) + geom_point() + geom_smooth(method = "lm") +
    scale_y_log10() + scale_x_log10() + theme_bw()
ggarrange(MutRate_meanMCC, SpRate_meanMCC, Ne_meanMCC, ncol = 3, common.legend = T)

Why values from MCC analysis give different pattern from using mean values across 100 posterior trees?


spmutClade_mean <- ggplot(filter(sigClades2, !species %in% exclude), aes(x = SpRate_mean,
    y = MutRate_mean, color = mutclade)) + geom_point(size = 0.7) + geom_smooth(method = "lm",
    se = F) + scale_y_log10() + scale_x_log10() + theme_bw() + scale_colour_manual(values = iwanthue(16))

spmutClade_MCC <- ggplot(filter(sigClades2, !species %in% exclude), aes(x = SpRate_MCC,
    y = MutRate_MCC, color = mutclade)) + geom_point(size = 0.7) + geom_smooth(method = "lm",
    se = F) + scale_y_log10() + scale_x_log10() + theme_bw() + scale_colour_manual(values = iwanthue(16))

ggarrange(spmutClade_mean, spmutClade_MCC, ncol = 2, common.legend = T, legend = "bottom")

spmut <- gendivSpRateMutRate %>%
    filter(set %in% "treeMCC") %>%
    select(species, tipRate, mutRate, mutclade, expNsub, time)

ggplot(spmut, aes(y = tipRate, x = mutRate, color = mutclade)) + geom_point() + geom_smooth(method = "lm",
    se = F) + scale_y_log10() + scale_x_log10() + scale_colour_manual(values = iwanthue(25))

ggplot(spmut, aes(y = tipRate, x = expNsub, color = mutclade)) + geom_point() + geom_smooth(method = "lm",
    se = F) + scale_y_log10() + scale_x_log10() + scale_colour_manual(values = iwanthue(25))

ggplot(spmut, aes(y = tipRate, x = mutRate, color = mutclade)) + geom_point() + geom_smooth(method = "lm",
    se = F) + scale_y_log10() + scale_x_log10() + scale_colour_manual(values = iwanthue(25))
ggplot(spmut, aes(y = tipRate, x = expNsub, color = mutclade)) + geom_point() + geom_smooth(method = "lm",
    se = F) + scale_y_log10() + scale_x_log10() + scale_colour_manual(values = iwanthue(25))
ggplot(spmut, aes(y = tipRate, x = time, color = mutclade)) + geom_point() + geom_smooth(method = "lm",
    se = F) + scale_y_log10() + scale_x_log10() + scale_colour_manual(values = iwanthue(25))
ggplot(spmut, aes(y = expNsub, x = time, color = mutclade)) + geom_point() + geom_smooth(method = "lm",
    se = F) + scale_y_log10() + scale_x_log10() + scale_colour_manual(values = iwanthue(25))
ggplot(spmut, aes(y = mutRate, x = time, color = mutclade)) + geom_point() + geom_smooth(method = "lm",
    se = F) + scale_y_log10() + scale_x_log10() + scale_colour_manual(values = iwanthue(25))

Dispersal tests

gendivSpRateTrait <- gendivSpRate %>%
    left_join(., traitData, by = "species")

mGendivSpRateTrait <- spRate %>%
    filter(!set %in% "treeMCC") %>%
    group_by(species) %>%
    dplyr::summarise(rate_mean = mean(tipRate, na.rm = TRUE), rate_sd = sd(tipRate,
        na.rm = TRUE), rate_se = rate_sd/sqrt(n()), rate_lower.ci = CI(tipRate, ci = 0.95)[1],
        rate_upper.ci = CI(tipRate, ci = 0.95)[3], clades = unique(clades)) %>%
    arrange(clades) %>%
    mutate(ord = seq(1, length(n)), species = forcats::fct_reorder(species, ord),
        clades = forcats::fct_relevel(case_when(!clades %in% unique(PGLSclade$clade) ~
            "Other", TRUE ~ clades), "Other", after = 14)) %>%
    inner_join(., select(gen.div, species, EstPi, EstTheta, Nind), by = "species") %>%
    inner_join(., traitData, by = "species")

qR = ggplot(mGendivSpRateTrait, aes(y = rate_mean, x = DispDist_notInputed)) + geom_point() +
    geom_smooth(method = "lm") + scale_x_log10() + scale_y_log10() + theme_bw() +
    geom_smooth(data = filter(gendivSpRateTrait, set %in% "treeMCC"), aes(y = tipRate,
        x = DispDist_notInputed), method = "lm", color = "red") + labs(caption = "Blue - regression line with mean rates; Red = regression line with MCC tree")

qp = ggplot(mGendivSpRateTrait, aes(y = EstPi, x = DispDist_notInputed)) + geom_point() +
    geom_smooth(method = "lm") + scale_x_log10() + scale_y_log10() + theme_bw()

ggarrange(qR, qp, ncol = 1, heights = c(1.1, 1))


OLS of speciation rates and genetic diversity predicted by dispersal distance

With mean rates vs MCC rates and logged dispersal vs non-logged

summary(lm(log(rate_mean) ~ log(DispDist_notInputed), data = mGendivSpRateTrait))
## 
## Call:
## lm(formula = log(rate_mean) ~ log(DispDist_notInputed), data = mGendivSpRateTrait)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.36605 -0.31725  0.08203  0.36261  1.32706 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -1.268350   0.013189 -96.167  < 2e-16 ***
## log(DispDist_notInputed)  0.023905   0.006474   3.693 0.000229 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5266 on 1598 degrees of freedom
##   (81 observations deleted due to missingness)
## Multiple R-squared:  0.00846,    Adjusted R-squared:  0.00784 
## F-statistic: 13.63 on 1 and 1598 DF,  p-value: 0.0002295
summary(lm(log(rate_mean) ~ DispDist_notInputed, data = mGendivSpRateTrait))
## 
## Call:
## lm(formula = log(rate_mean) ~ DispDist_notInputed, data = mGendivSpRateTrait)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.34137 -0.32305  0.08152  0.37785  1.44098 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -1.266e+00  1.328e-02  -95.36   <2e-16 ***
## DispDist_notInputed  2.050e-05  2.562e-05    0.80    0.424    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5288 on 1598 degrees of freedom
##   (81 observations deleted due to missingness)
## Multiple R-squared:  0.0004005,  Adjusted R-squared:  -0.0002251 
## F-statistic: 0.6402 on 1 and 1598 DF,  p-value: 0.4238
summary(lm(log(tipRate) ~ log(DispDist_notInputed), data = filter(gendivSpRateTrait,
    set %in% "treeMCC")))
## 
## Call:
## lm(formula = log(tipRate) ~ log(DispDist_notInputed), data = filter(gendivSpRateTrait, 
##     set %in% "treeMCC"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.89374 -0.29633 -0.00365  0.30936  1.57642 
## 
## Coefficients:
##                           Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)              -1.469047   0.012793 -114.830   <2e-16 ***
## log(DispDist_notInputed)  0.005928   0.006280    0.944    0.345    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5108 on 1598 degrees of freedom
##   (2464 observations deleted due to missingness)
## Multiple R-squared:  0.0005573,  Adjusted R-squared:  -6.818e-05 
## F-statistic: 0.891 on 1 and 1598 DF,  p-value: 0.3454
summary(lm(log(EstPi) ~ log(DispDist_notInputed), data = mGendivSpRateTrait))
## 
## Call:
## lm(formula = log(EstPi) ~ log(DispDist_notInputed), data = mGendivSpRateTrait)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8897 -0.6364  0.1482  0.7631  2.1105 
## 
## Coefficients:
##                          Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)              -4.06843    0.02526 -161.071  < 2e-16 ***
## log(DispDist_notInputed) -0.10215    0.01240   -8.239 3.59e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.009 on 1598 degrees of freedom
##   (81 observations deleted due to missingness)
## Multiple R-squared:  0.04074,    Adjusted R-squared:  0.04014 
## F-statistic: 67.87 on 1 and 1598 DF,  p-value: 3.587e-16
summary(lm(log(EstPi) ~ DispDist_notInputed, data = mGendivSpRateTrait))
## 
## Call:
## lm(formula = log(EstPi) ~ DispDist_notInputed, data = mGendivSpRateTrait)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4732 -0.6824  0.1525  0.7757  2.3263 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -4.069e+00  2.568e-02 -158.46  < 2e-16 ***
## DispDist_notInputed -2.397e-04  4.953e-05   -4.84 1.42e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.022 on 1598 degrees of freedom
##   (81 observations deleted due to missingness)
## Multiple R-squared:  0.01445,    Adjusted R-squared:  0.01383 
## F-statistic: 23.43 on 1 and 1598 DF,  p-value: 1.424e-06

PGLS of speciation rates and genetic diversity predicted by dispersal distance using MCC rates with logged dispersal

# load(paste0(folder_path,'speciation_rate/output/upham_4064sp_FR_MCCposterior100.rdata'))
# DataSubsetMCC <- gendivSpRateTrait %>% filter(treeN %in% 'treeMCC') %>%
# select(species, tipRate, EstPi, DispDist_notInputed) %>% drop_na() treeMCC <-
# TreeSet[['treeMCC']] TreeSubset <- drop.tip(treeMCC,
# as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
# DataSubsetMCC$species)])) datacomp <- comparative.data(data = DataSubsetMCC,
# phy = TreeSubset, names.col = 'species', vcv = TRUE, na.omit = TRUE,
# warn.dropped = TRUE) modpiDisp <- pgls(log(EstPi) ~ log(DispDist_notInputed),
# data = datacomp, lambda = 'ML') saveRDS(modpiDisp,
# paste0(folder_path,'pgls/extra/pglspiDisp.rds'))
modpiDisp <- readRDS(paste0(folder_path, "pgls/extra/pglspiDisp.rds"))
summary(modpiDisp)
## 
## Call:
## pgls(formula = log(EstPi) ~ log(DispDist_notInputed), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35708 -0.06904 -0.00077  0.07306  0.34367 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.614
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.496, 0.711)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                           Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)              -3.966737   0.637180 -6.2255 6.123e-10 ***
## log(DispDist_notInputed)  0.014738   0.026064  0.5655    0.5718    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1044 on 1598 degrees of freedom
## Multiple R-squared: 0.0002,  Adjusted R-squared: -0.0004256 
## F-statistic: 0.3197 on 1 and 1598 DF,  p-value: 0.5718
par(mfrow = c(2, 2))
plot(modpiDisp)

# modspRateDisp <- pgls(log(tipRate) ~ log(DispDist_notInputed), data =
# datacomp, lambda = 'ML') saveRDS(modspRateDisp,
# paste0(folder_path,'pgls/extra/pglsspRateDisp.rds'))
modspRateDisp <- readRDS(paste0(folder_path, "pgls/extra/pglsspRateDisp.rds"))
summary(modspRateDisp)
## 
## Call:
## pgls(formula = log(tipRate) ~ log(DispDist_notInputed), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.32894 -0.04146  0.00016  0.04290  0.23523 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.991
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.989, 0.993)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                            Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)              -2.5579207  0.4665870 -5.4822 4.875e-08 ***
## log(DispDist_notInputed) -0.0064216  0.0059138 -1.0859    0.2777    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0641 on 1598 degrees of freedom
## Multiple R-squared: 0.0007373,   Adjusted R-squared: 0.000112 
## F-statistic: 1.179 on 1 and 1598 DF,  p-value: 0.2777
plot(modspRateDisp)

# modpirateDisp <- pgls(log(EstPi) ~ log(tipRate) + log(DispDist_notInputed),
# data = datacomp, lambda = 'ML') saveRDS(modpirateDisp,
# paste0(folder_path,'pgls/extra/pglspirateDisp.rds'))
modpirateDisp <- readRDS(paste0(folder_path, "pgls/extra/pglspirateDisp.rds"))
summary(modpirateDisp)
## 
## Call:
## pgls(formula = log(EstPi) ~ log(tipRate) + log(DispDist_notInputed), 
##     data = datacomp, lambda = "ML")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.302448 -0.061116  0.003635  0.063845  0.296543 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.504
##    lower bound : 0.000, p = < 2.22e-16
##    upper bound : 1.000, p = < 2.22e-16
##    95.0% CI   : (0.355, 0.632)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                           Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)              -4.829887   0.549434 -8.7907 < 2.2e-16 ***
## log(tipRate)             -0.394741   0.069098 -5.7128 1.323e-08 ***
## log(DispDist_notInputed) -0.008585   0.024732 -0.3471    0.7285    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0935 on 1597 degrees of freedom
## Multiple R-squared: 0.02003, Adjusted R-squared: 0.0188 
## F-statistic: 16.32 on 2 and 1597 DF,  p-value: 9.649e-08
plot(modpirateDisp)

After phylogenetic correction, dispersal is not meaningful neither for genetic diversity nor for speciation rates.


Compare with genomic data from Buffalo 2021

Only 29 tips can be matched and with data for genetic diversity

With the data used by Buffalo 2021

buff <- read_tsv("/Users/acas/Dropbox/Post-docs/Morlon_Lab/Published_Datasets_Code/buffalo2021_paradox_variation/data/combined_data.tsv")

bgendivSpRate <- buff %>%
    mutate(species = str_replace(species, " ", "_")) %>%
    inner_join(gendivSpRate) %>%
    filter(set %in% "treeMCC") %>%
    select(species, tipRate, EstPi, diversity) %>%
    drop_na()
rownames(bgendivSpRate) <- bgendivSpRate$species

TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
    bgendivSpRate$species)]))

datacomp <- comparative.data(data = data.frame(bgendivSpRate), phy = TreeSubset,
    names.col = "species", vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)

summary(lm(log(diversity) ~ log(tipRate), data = datacomp$data))
## 
## Call:
## lm(formula = log(diversity) ~ log(tipRate), data = datacomp$data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3.01202 -0.36429 -0.01766  0.56813  1.86999 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -6.0183     0.5200 -11.573 5.66e-12 ***
## log(tipRate)   0.1254     0.3534   0.355    0.725    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9998 on 27 degrees of freedom
## Multiple R-squared:  0.004643,   Adjusted R-squared:  -0.03222 
## F-statistic: 0.1259 on 1 and 27 DF,  p-value: 0.7254
modBuffDiv <- pgls(log(diversity) ~ log(tipRate), data = datacomp, lambda = "ML")
summary(modBuffDiv)
## 
## Call:
## pgls(formula = log(diversity) ~ log(tipRate), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30329 -0.10389 -0.02971  0.03150  0.22426 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.972
##    lower bound : 0.000, p = 0.0076068
##    upper bound : 1.000, p = 0.22535
##    95.0% CI   : (0.598, NA)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)  -6.2540813  1.1483784 -5.4460 9.211e-06 ***
## log(tipRate)  0.0017352  0.3423443  0.0051     0.996    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1302 on 27 degrees of freedom
## Multiple R-squared: 9.515e-07,   Adjusted R-squared: -0.03704 
## F-statistic: 2.569e-05 on 1 and 27 DF,  p-value: 0.996

With the same species but with the mtDNA dataset

summary(lm(log(EstPi) ~ log(tipRate), data = datacomp$data))
## 
## Call:
## lm(formula = log(EstPi) ~ log(tipRate), data = datacomp$data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3955 -0.6474  0.2172  0.7539  1.7333 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -3.5826     0.4541  -7.889 1.76e-08 ***
## log(tipRate)   0.5512     0.3086   1.786   0.0853 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.873 on 27 degrees of freedom
## Multiple R-squared:  0.1057, Adjusted R-squared:  0.07254 
## F-statistic:  3.19 on 1 and 27 DF,  p-value: 0.08533
modEstPi <- pgls(log(EstPi) ~ log(tipRate), data = datacomp, lambda = "ML")
summary(modEstPi)
## 
## Call:
## pgls(formula = log(EstPi) ~ log(tipRate), data = datacomp, lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.20362 -0.09417  0.00968  0.05339  0.24818 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.956
##    lower bound : 0.000, p = 0.013477
##    upper bound : 1.000, p = 0.30822
##    95.0% CI   : (0.413, NA)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  -3.11050    0.98663 -3.1527 0.003939 **
## log(tipRate)  0.55331    0.30476  1.8156 0.080556 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1111 on 27 degrees of freedom
## Multiple R-squared: 0.1088,  Adjusted R-squared: 0.0758 
## F-statistic: 3.296 on 1 and 27 DF,  p-value: 0.08056
summary(lm(log(EstPi) ~ log(diversity), data = datacomp$data))
## 
## Call:
## lm(formula = log(EstPi) ~ log(diversity), data = datacomp$data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.50010 -0.59841  0.02811  0.40313  1.74595 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)     -1.6995     0.9846  -1.726   0.0958 .
## log(diversity)   0.4266     0.1571   2.715   0.0114 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8183 on 27 degrees of freedom
## Multiple R-squared:  0.2144, Adjusted R-squared:  0.1853 
## F-statistic: 7.369 on 1 and 27 DF,  p-value: 0.01142
bgendivSpRate %>%
    ggplot(aes(x = diversity, y = EstPi)) + geom_point() + geom_smooth(method = "lm") +
    scale_x_log10() + scale_y_log10() + theme_bw() + labs(x = "nDNA_Buffalo", y = "mtDNA_thisStudy")

modBuffDivmtDNA <- pgls(log(EstPi) ~ log(diversity), data = datacomp, lambda = "ML")
summary(modBuffDivmtDNA)
## 
## Call:
## pgls(formula = log(EstPi) ~ log(diversity), data = datacomp, 
##     lambda = "ML")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.20853 -0.04562  0.03095  0.07641  0.16219 
## 
## Branch length transformations:
## 
## kappa  [Fix]  : 1.000
## lambda [ ML]  : 0.733
##    lower bound : 0.000, p = 0.28287
##    upper bound : 1.000, p = 0.11808
##    95.0% CI   : (NA, NA)
## delta  [Fix]  : 1.000
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    -2.29395    1.20116 -1.9098  0.06684 .
## log(diversity)  0.29073    0.16682  1.7428  0.09276 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08583 on 27 degrees of freedom
## Multiple R-squared: 0.1011,  Adjusted R-squared: 0.06782 
## F-statistic: 3.037 on 1 and 27 DF,  p-value: 0.09276
par(mfrow = c(2, 2))
plot(modBuffDivmtDNA)

At least these two datasets are kind of correlated, but not sure what to conclude when correcting for the phylogeny gives a non-significant positive correlation.


bgendivSpRate %>%
    pivot_longer(cols = EstPi:diversity) %>%
    ggplot(aes(x = tipRate, y = value, color = name)) + geom_point() + geom_smooth(method = "lm") +
    scale_x_log10() + scale_y_log10() + labs(x = "Tip Speciation rate", y = "Diversity") +
    theme_bw() + scale_color_manual(name = "", values = hues::iwanthue(2), labels = c("nDNA_Buffalo",
    "mtDNA_thisStudy"))

With this highly filtered dataset the correlation is positive for both datasets but when corrected for phylogenetic structure is not significant.

library(ggtree)
setdiv <- bgendivSpRate %>%
    select(species) %>%
    mutate(inBuff = "yes")

nodedf <- data.frame(node = nodeid(MCCtree, setdiv$species))

ggtree(MCCtree, layout = "circular", size = 0.1) %<+% setdiv + geom_hilight(data = nodedf,
    mapping = aes(node = node), color = "darkred", size = 1, show.legend = FALSE) +
    geom_tippoint(aes(color = inBuff), na.rm = TRUE, show.legend = FALSE) + scale_colour_manual(values = "darkred",
    na.value = NA)

The dataset is not very evenly sampled across the Mammals phylogeny.


Check relationship between hyper-parameters and slope with family data

### get mammals families data
mccParF <- read.table("/Users/acas/Dropbox/Post-docs/Morlon_Lab/analyses/diversification/phylogenies/Upham/families/UphamClades_fromNathan_FBD_clades_fraction_fam.txt",
    header = T, comment.char = "") %>%
    mutate(stemAge = NA, crownAge = NA, class = "birds", rank = "family") %>%
    dplyr::rename(n = Nsp_DNAonly_fam) %>%
    left_join(read.table("/Users/acas/Dropbox/Post-docs/Morlon_Lab/analyses/diversification/phylogenies/Upham/families/families_pars.txt",
        header = T), by = "clades")

gen.div <- read.delim(paste0(folder_path, "genetic_diversity/GenDiv_subsample5Ind.txt")) %>%
    filter(EstPi > 0, !outPi %in% "out", !outTheta %in% "out")

clades <- read.delim(paste0(folder_path, "speciation_rate/input/MamPhy_5911sp_tipGenFamOrdCladeGenesSampPC.txt")) %>%
    drop_na(PC) %>%
    mutate(species = word(tiplabel, 1, 2, sep = "_"), clades = sub("^PC\\d+_", "",
        PC)) %>%
    select(species, clades, ord, fam) %>%
    filter(species %in% treeMCC$tip.labels)

spRate <- read.delim("/Users/acas/Dropbox/Post-docs/Morlon_Lab/analyses/diversification/phylogenies/Upham/families/families_tipRate.txt") %>%
    rename(tipRate = rate, fam = clades) %>%
    left_join(., clades, by = c("species", "fam"))

gendivSpRate <- spRate %>%
    left_join(., select(gen.div, species, EstPi, subPi_mean, EstTheta, subTheta_mean),
        by = "species")

freqClade <- gendivSpRate %>%
    select(species, fam, EstPi) %>%
    drop_na() %>%
    group_by(fam) %>%
    tally() %>%
    filter(n > 20, fam %in% mccParF$clades)


load(paste0(folder_path, "speciation_rate/output/upham_4064sp_FR_MCCposterior100.rdata"))  #loads TreeSet
tr <- TreeSet[["treeMCC"]]

datacompClades <- list()
for (f in as.character(freqClade$fam)) {

    DataSubset <- gendivSpRate %>%
        select(species, fam, EstPi, tipRate) %>%
        filter(fam %in% f) %>%
        drop_na()

    TreeSubset <- drop.tip(tr, as.character(tr$tip.label[which(!tr$tip.label %in%
        DataSubset$species)]))

    datacompClades[[f]] <- comparative.data(data = DataSubset, phy = TreeSubset,
        names.col = "species", vcv = TRUE, na.omit = TRUE, warn.dropped = TRUE)

}

wd <- paste0(folder_path, "pgls/family/")
dir.create(wd, recursive = T)
pgls <- list()
for (clade in names(datacompClades)) {
    tryCatch({

        modpglsEstPi <- NULL
        modpglsEstPi <- pgls(log(EstPi) ~ log(tipRate), data = datacompClades[[clade]],
            lambda = "ML")

        pgls[[clade]] <- list(EstPi = modpglsEstPi)

        print(clade)
    }, error = function(e) {
        cat("Error in", conditionMessage(e), "\n")
    })
}

saveRDS(pgls, paste0(wd, "family_gendivSpRate_results_treeMCC.rds"))
wd <- paste0(folder_path, "pgls/family/")
mccParF <- read.table("/Users/acas/Dropbox/Post-docs/Morlon_Lab/analyses/diversification/phylogenies/Upham/families/UphamClades_fromNathan_FBD_clades_fraction_fam.txt",
    header = T, comment.char = "") %>%
    mutate(stemAge = NA, crownAge = NA, class = "birds", rank = "family") %>%
    dplyr::rename(n = Nsp_DNAonly_fam) %>%
    left_join(read.table("/Users/acas/Dropbox/Post-docs/Morlon_Lab/analyses/diversification/phylogenies/Upham/families/families_pars.txt",
        header = T), by = "clades")

pgls <- readRDS(paste0(wd, "family_gendivSpRate_results_treeMCC.rds"))

pglssum <- data.frame()
for (j in names(pgls)) {
    if (!is.null(pgls[[j]])) {
        pglssum <- bind_rows(pglssum, data.frame(set = str_replace(word(i, -1, sep = "_"),
            ".rds", ""), analysis = j, df = summary(pgls[[j]]$EstPi)$df[2], term = rownames(summary(pgls[[j]]$EstPi)$coefficients),
            summary(pgls[[j]]$EstPi)$coefficients, sigma = summary(pgls[[j]]$EstPi)$sigma,
            lambda = pgls[[j]]$EstPi$param.CI$lambda$opt, lambda.CIlow = pgls[[j]]$EstPi$param.CI$lambda$ci.val[1],
            lambda.CIup = pgls[[j]]$EstPi$param.CI$lambda$ci.val[2], stringsAsFactors = FALSE))
    }
}
saveRDS(pglssum, paste0(wd, "family_gendivSpRate_PGLSresults.rds"))

pglsFam <- mccParF %>%
    rename(analysis = clades) %>%
    right_join(pglssum, by = "analysis", suffix = c(".clads", ".pgls")) %>%
    filter(term %in% "log(tipRate)") %>%
    select(analysis, sigma.clads:lambda_0, Estimate, Nsp_complete)

pglsFam %>%
    rename(sigma = sigma.clads) %>%
    pivot_longer(-c(analysis, Estimate, Nsp_complete)) %>%
    ggplot(aes(x = value, y = Estimate)) + geom_point() + geom_smooth(method = "lm") +
    facet_wrap(~name, scales = "free") + labs(y = "slope estimate", x = "hyperparameter values")

summary(lm(data = pglsFam, Estimate ~ alpha))
## 
## Call:
## lm(formula = Estimate ~ alpha, data = pglsFam)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.9147 -0.5086 -0.0767  0.8215  2.7938 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)    1.299      4.827   0.269    0.791
## alpha         -1.898      5.391  -0.352    0.729
## 
## Residual standard error: 1.628 on 16 degrees of freedom
## Multiple R-squared:  0.007687,   Adjusted R-squared:  -0.05433 
## F-statistic: 0.1239 on 1 and 16 DF,  p-value: 0.7294

Hyperparamters don’t add any explanatory power to the slope sign or intensity.

pglsFam %>%
    ggplot(aes(x = Nsp_complete, y = Estimate)) + geom_point() + geom_smooth(method = "lm") +
    labs(y = "Slope estimate", x = "Total number of in phylogeny per clade")

summary(lm(data = pglsFam, Estimate ~ Nsp_complete))
## 
## Call:
## lm(formula = Estimate ~ Nsp_complete, data = pglsFam)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.9420 -0.5522 -0.0004  0.9505  2.7302 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -0.2991693  0.5385603  -0.555    0.586
## Nsp_complete -0.0004369  0.0017309  -0.252    0.804
## 
## Residual standard error: 1.631 on 16 degrees of freedom
## Multiple R-squared:  0.003967,   Adjusted R-squared:  -0.05829 
## F-statistic: 0.06372 on 1 and 16 DF,  p-value: 0.8039

No relationship with species richness.


If speciation rate was the response variable?

Have to use gls function with corPagel (similar to pgls from caper) because it gives an error for the optimizer

library(nlme)
dtset <- gendivSpRate %>%
    filter(set %in% "treeMCC") %>%
    drop_na(EstPi)

treeMCC <- TreeSet[["treeMCC"]]

TreeSubset <- drop.tip(treeMCC, as.character(treeMCC$tip.label[which(!treeMCC$tip.label %in%
    dtset$species)]))

fit1 <- gls(log(tipRate) ~ log(EstPi), data = dtset, correlation = corPagel(1, phy = TreeSubset,
    form = ~species), method = "ML")

summary(fit1)
## Generalized least squares fit by maximum likelihood
##   Model: log(tipRate) ~ log(EstPi) 
##   Data: dtset 
##         AIC       BIC   logLik
##   -641.2292 -619.0816 324.6146
## 
## Correlation Structure: corPagel
##  Formula: ~species 
##  Parameter estimate(s):
##   lambda 
## 0.990391 
## 
## Coefficients:
##                  Value Std.Error   t-value p-value
## (Intercept) -2.6059169 0.4615167 -5.646420  0.0000
## log(EstPi)  -0.0084258 0.0039507 -2.132721  0.0331
## 
##  Correlation: 
##            (Intr)
## log(EstPi) 0.034 
## 
## Standardized residuals:
##        Min         Q1        Med         Q3        Max 
## -0.9148429  0.9357824  1.2830346  1.6500237  3.0912831 
## 
## Residual standard error: 0.8604941 
## Degrees of freedom: 1876 total; 1874 residual